## 1st we need to write tests for all the helper functions then we need more
## tests for select (generally)
## Why test the lower level helpers? Because that way I will get a failure
## point right at the location where the trouble occurs (high resolution for
## trouble detection)._
require("TxDb.Hsapiens.UCSC.hg19.knownGene")
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
require("RUnit")
test_getTableColMapping <- function(){
res <- GenomicFeatures:::.getTableColMapping(txdb)
exp <- list(cds=c("_cds_id","cds_name","cds_chrom","cds_strand","cds_start",
"cds_end"),
exon=c("_exon_id","exon_name","exon_chrom","exon_strand",
"exon_start","exon_end"),
gene=c("gene_id","_tx_id"),
splicing=c("_tx_id","exon_rank","_exon_id","_cds_id"),
transcript=c("_tx_id","tx_name","tx_type","tx_chrom","tx_strand",
"tx_start","tx_end"))
checkIdentical(res, exp)
}
test_makeColAbbreviations <- function(){
res <- GenomicFeatures:::.makeColAbbreviations(txdb)
checkTrue(res[["_cds_id"]]=="CDSID")
res2 <- GenomicFeatures:::.getTableColMapping(txdb)
checkTrue(length(res)==21, length(unique(unlist(res2))))
}
test_reverseColAbbreviations <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
res <- GenomicFeatures:::.reverseColAbbreviations(txdb, cnames)
checkTrue(names(cnames)[[1]]==res[[1]])
checkTrue(length(res) == length(cnames))
}
test_getTableNames <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
res <- GenomicFeatures:::.getTableNames(txdb, cnames)
## Let's check the ones that fail more easily
checkTrue(length(res[["_tx_id"]])==3)
checkTrue(length(res[["_exon_id"]])==2)
checkTrue(length(res[["_cds_id"]])==2)
}
test_getSimpleTableNames <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
res <- GenomicFeatures:::.getSimpleTableNames(txdb, cnames)
exp <- c("cds","splicing","exon","gene","transcript")
checkIdentical(res, exp)
}
test_encodeSortedTableKey <- function(){
sTNames <- c("s", "e", "t")
res <- GenomicFeatures:::.encodeSortedTableKey(sTNames)
checkIdentical(res,"tse")
}
test_makeTableKey <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
res <- GenomicFeatures:::.makeTableKey(txdb, cnames)
checkIdentical(res,"gtsec")
}
test_missingTableInterpolator <- function(){
tName <- "x"
res <- GenomicFeatures:::.missingTableInterpolator(tName)
checkIdentical(res,"x")
tName <- "se"
res <- GenomicFeatures:::.missingTableInterpolator(tName)
checkIdentical(res,"tse")
}
test_tableJoinSelector <- function(){
tName <- "t"
res <- GenomicFeatures:::.tableJoinSelector(tName)
checkIdentical(res,"transcript")
tName <- "gt"
res <- GenomicFeatures:::.tableJoinSelector(tName)
checkIdentical(res, "(SELECT * FROM transcript LEFT JOIN gene ON (transcript._tx_id = gene._tx_id) )")
tName <- "FOOBAZZLE"
checkException(GenomicFeatures:::.tableJoinSelector(tName))
}
test_makeSelectList <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)[c("_cds_id","_tx_id")]
res <- GenomicFeatures:::.makeSelectList(txdb, cnames)
exp <- "c._cds_id, g._tx_id" ## 2nd one will be a "g."
checkIdentical(res, exp)
}
test_makeAsList <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)[c("_cds_id","_tx_id")]
res <- GenomicFeatures:::.makeAsList(txdb, cnames)
exp <- "cds AS c, splicing AS s, gene AS g, transcript AS t"
checkIdentical(res, exp)
}
test_makeJoinSQL <- function(){
cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)[c("_cds_id","_tx_id")]
res <- GenomicFeatures:::.makeJoinSQL(txdb, cnames)
exp <- "(SELECT * FROM transcript LEFT JOIN gene ON (transcript._tx_id = gene._tx_id) INNER JOIN splicing ON (transcript._tx_id = splicing._tx_id) LEFT JOIN cds ON (splicing._cds_id = cds._cds_id) )"
checkIdentical(res, exp)
}
test_makeKeyList <- function(){
ks <- 1:6
kt <- "TXID"
res <- GenomicFeatures:::.makeKeyList(txdb, keys=ks, keytype=kt)
exp <- "g._tx_id IN ( '1','2','3','4','5','6' )"
checkIdentical(res, exp)
}
test_keys <- function(){
checkException(keys(txdb, keytype="CDSCHROM"))
}
test_keys_advancedArgs <- function(){
k1 <- keys(txdb, keytype="TXNAME")
checkTrue("uc001aaa.3" %in% k1)
k2 <- keys(txdb, keytype="TXNAME", pattern=".2$")
checkTrue("uc001aaq.2" %in% k2)
checkTrue(!("uc001aaa.3" %in% k2))
checkTrue(length(k2) < length(k1))
l1 <- length(keys(txdb, keytype="TXID", column="GENEID"))
l2 <- length(keys(txdb, keytype="TXID"))
checkTrue(l1 < l2)
k3 <- head(keys(txdb, keytype="GENEID", pattern=".2$",
column="TXNAME", fuzzy=TRUE))
res <- suppressWarnings( select(txdb, k3, columns=c("GENEID","TXNAME"),
keytype="GENEID"))
checkTrue(any(grepl(".2$",res$TXNAME)))
}
test_select <- function(){
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID"), colnames(res))
checkTrue(length(res$GENEID)==length(keys))
checkIdentical(res$GENEID, keys)
keys = head(keys(txdb, "TXID"))
cols = c("TXID")
res <- select(txdb, keys, cols, keytype="TXID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXID"), colnames(res))
checkTrue(length(res$TXID)==length(keys))
checkIdentical(res$TXID, keys)
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID","TXID")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","TXID"), colnames(res))
checkTrue(length(unique(res$GENEID))==length(keys))
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID","TXID", "EXONRANK")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","TXID","EXONRANK"), colnames(res))
checkTrue(length(unique(res$GENEID))==length(keys))
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID","TXID", "EXONRANK","CDSID")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","CDSID","TXID","EXONRANK"), colnames(res))
checkTrue(length(unique(res$GENEID))==length(keys))
## It's really cosmetic but: should the order of the final data.frame match
## the order of the cols?
## I think so, except that we may add a col for keys (even if not requested)
## if added, such a col should be in front.
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID","TXID", "EXONRANK", "EXONID")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","EXONID","TXID","EXONRANK"), colnames(res))
checkTrue(length(unique(res$GENEID))==length(keys))
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID","TXID", "EXONRANK", "EXONID", "CDSID")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","CDSID","EXONID","TXID","EXONRANK"), colnames(res))
checkTrue(length(unique(res$GENEID))==length(keys))
keys = head(keys(txdb, "TXID"))
cols = c("TXID", "EXONRANK", "EXONID", "CDSID")
res <- select(txdb, keys, cols, keytype="TXID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXID","CDSID","EXONID","EXONRANK"), colnames(res))
checkTrue(length(unique(res$TXID))==length(keys))
keys = head(keys(txdb, "EXONID"))
cols = c("EXONRANK", "EXONID", "CDSID")
res <- select(txdb, keys, cols, keytype="EXONID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("EXONID","CDSID","EXONRANK"), colnames(res))
checkTrue(length(unique(res$EXONID))==length(keys))
keys = head(keys(txdb, "TXNAME"))
cols = c("GENEID","TXNAME", "CDSID", "EXONSTART")
res <- select(txdb, keys, cols, keytype="TXNAME")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXNAME","CDSID","EXONSTART","GENEID"), colnames(res))
checkTrue(length(unique(res$TXNAME))==length(keys))
keys = head(keys(txdb, "TXNAME"))
cols = c("GENEID", "EXONSTART","TXNAME")
res <- select(txdb, keys, cols, keytype="TXNAME")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXNAME","EXONSTART","GENEID"), colnames(res))
checkTrue(length(unique(res$TXNAME))==length(keys))
keys = head(keys(txdb, "TXNAME"))
cols = c("GENEID", "CDSID","TXNAME")
res <- select(txdb, keys, cols, keytype="TXNAME")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXNAME","CDSID","GENEID"), colnames(res))
checkTrue(length(unique(res$TXNAME))==length(keys))
keys = head(keys(txdb, "TXID"))
cols = c("GENEID","TXNAME", "TXID")
res <- select(txdb, keys, cols, keytype="TXID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXID","GENEID","TXNAME"), colnames(res))
checkTrue(length(unique(res$TXID))==length(keys))
## For this particular case, we want to make sure that the TXNAMES are not
## being copied (there should be one unique one for each ID in this range)
checkTrue(length(unique(res$TXNAME)) == length(res$TXNAME))
keys = head(keys(txdb, "CDSNAME"))
cols = c("GENEID","TXNAME", "TXID", "CDSNAME")
checkException(select(txdb, keys, cols, keytype="CDSNAME"), silent=TRUE)
keys = head(keys(txdb, "CDSID"))
cols = c("GENEID","TXNAME", "TXID", "CDSNAME")
res <- select(txdb, keys, cols, keytype="CDSID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols)+1) ## this is one where we ADD an extra!
checkIdentical(c("CDSID","CDSNAME","GENEID","TXID","TXNAME"), colnames(res))
checkTrue(length(unique(res$CDSID))==length(keys))
## stress test (this used to take way too long)
keys = head(keys(txdb, "GENEID"))
cols = c("GENEID","CDSSTART")
res <- select(txdb, keys, cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","CDSSTART"), colnames(res))
}
test_select_isActiveSeq <- function(){
## set isActiveSeq to only watch chr1
txdb <- restoreSeqlevels(txdb) ## This is to reset things (safety measure)
isActiveSeq(txdb)[seqlevels(txdb)] <- FALSE
isActiveSeq(txdb) <- c("chr1"=TRUE)
## then use select
keys <- head(keys(txdb, "GENEID"))
cols <- c("GENEID","CDSSTART", "CDSCHROM")
res <- select(txdb, keys, columns = cols, keytype="GENEID")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("GENEID","CDSCHROM","CDSSTART"), colnames(res))
uniqChrs <- unique(res$CDSCHROM)[!is.na(unique(res$CDSCHROM))]
checkIdentical(c("chr1"),uniqChrs)
## keys must contain keys that match to more than one thing
keys <- c(head(keys(txdb,keytype="TXNAME")),
tail(keys(txdb,keytype="TXNAME")))
cols <- c("TXNAME","TXCHROM","TXSTRAND")
res <- select(txdb, keys, columns = cols, keytype="TXNAME")
checkTrue(dim(res)[1]>0)
checkTrue(dim(res)[2]==length(cols))
checkIdentical(c("TXNAME","TXCHROM","TXSTRAND"), colnames(res))
uniqChrs <- unique(res$TXCHROM)[!is.na(unique(res$TXCHROM))]
checkIdentical(c("chr1"),uniqChrs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.