inst/unitTests/test_select-methods.R

## 1st we need to write tests for all the helper functions then we need more
## tests for select (generally)
## Why test the lower level helpers?  Because that way I will get a failure
## point right at the location where the trouble occurs (high resolution for
## trouble detection)._
require("TxDb.Hsapiens.UCSC.hg19.knownGene")
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
require("RUnit")
  
test_getTableColMapping <- function(){
  res <- GenomicFeatures:::.getTableColMapping(txdb)
  exp <- list(cds=c("_cds_id","cds_name","cds_chrom","cds_strand","cds_start",
                "cds_end"),
              exon=c("_exon_id","exon_name","exon_chrom","exon_strand",
                "exon_start","exon_end"),
              gene=c("gene_id","_tx_id"),
              splicing=c("_tx_id","exon_rank","_exon_id","_cds_id"),
              transcript=c("_tx_id","tx_name","tx_type","tx_chrom","tx_strand",
                "tx_start","tx_end"))
  checkIdentical(res, exp)
}

test_makeColAbbreviations <- function(){
  res <- GenomicFeatures:::.makeColAbbreviations(txdb)
  checkTrue(res[["_cds_id"]]=="CDSID")
  res2 <- GenomicFeatures:::.getTableColMapping(txdb)
  checkTrue(length(res)==21, length(unique(unlist(res2))))
}

test_reverseColAbbreviations <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
  res <- GenomicFeatures:::.reverseColAbbreviations(txdb, cnames)
  checkTrue(names(cnames)[[1]]==res[[1]])
  checkTrue(length(res) == length(cnames))
}

test_getTableNames <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
  res <- GenomicFeatures:::.getTableNames(txdb, cnames)
  ## Let's check the ones that fail more easily
  checkTrue(length(res[["_tx_id"]])==3)
  checkTrue(length(res[["_exon_id"]])==2)
  checkTrue(length(res[["_cds_id"]])==2)
}

test_getSimpleTableNames <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
  res <- GenomicFeatures:::.getSimpleTableNames(txdb, cnames)
  exp <- c("cds","splicing","exon","gene","transcript")
  checkIdentical(res, exp)
}

test_encodeSortedTableKey <- function(){
  sTNames <- c("s", "e", "t")
  res <- GenomicFeatures:::.encodeSortedTableKey(sTNames)
  checkIdentical(res,"tse")
}

test_makeTableKey <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)
  res <- GenomicFeatures:::.makeTableKey(txdb, cnames)
  checkIdentical(res,"gtsec")
}

test_missingTableInterpolator <- function(){
  tName <- "x"
  res <- GenomicFeatures:::.missingTableInterpolator(tName)
  checkIdentical(res,"x")
  tName <- "se"
  res <- GenomicFeatures:::.missingTableInterpolator(tName)
  checkIdentical(res,"tse")
}

test_tableJoinSelector <- function(){
  tName <- "t"
  res <- GenomicFeatures:::.tableJoinSelector(tName)
  checkIdentical(res,"transcript")
  tName <- "gt"
  res <- GenomicFeatures:::.tableJoinSelector(tName)
  checkIdentical(res, "(SELECT * FROM transcript LEFT JOIN gene  ON (transcript._tx_id = gene._tx_id) )")
  tName <- "FOOBAZZLE"
  checkException(GenomicFeatures:::.tableJoinSelector(tName))
}

test_makeSelectList <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)[c("_cds_id","_tx_id")]
  res <- GenomicFeatures:::.makeSelectList(txdb, cnames)
  exp <- "c._cds_id, g._tx_id" ## 2nd one will be a "g."
  checkIdentical(res, exp)
}

test_makeAsList <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)[c("_cds_id","_tx_id")]
  res <- GenomicFeatures:::.makeAsList(txdb, cnames)
  exp <- "cds AS c, splicing AS s, gene AS g, transcript AS t"
  checkIdentical(res, exp)
}

test_makeJoinSQL <- function(){
  cnames <- GenomicFeatures:::.makeColAbbreviations(txdb)[c("_cds_id","_tx_id")]
  res <- GenomicFeatures:::.makeJoinSQL(txdb, cnames)
  exp <- "(SELECT * FROM transcript LEFT JOIN gene  ON (transcript._tx_id = gene._tx_id) INNER JOIN splicing  ON (transcript._tx_id = splicing._tx_id)  LEFT JOIN cds ON (splicing._cds_id = cds._cds_id) )"
  checkIdentical(res, exp)
}

test_makeKeyList <- function(){
  ks <- 1:6
  kt <- "TXID"
  res <- GenomicFeatures:::.makeKeyList(txdb, keys=ks, keytype=kt)
  exp <- "g._tx_id IN ( '1','2','3','4','5','6' )"
  checkIdentical(res, exp)
}


test_keys <- function(){
  checkException(keys(txdb, keytype="CDSCHROM"))
}

test_keys_advancedArgs <- function(){
    k1 <- keys(txdb, keytype="TXNAME")
    checkTrue("uc001aaa.3" %in% k1)
    
    k2 <- keys(txdb, keytype="TXNAME", pattern=".2$")
    checkTrue("uc001aaq.2" %in% k2)
    checkTrue(!("uc001aaa.3" %in% k2))
    checkTrue(length(k2) < length(k1))

    l1 <- length(keys(txdb, keytype="TXID", column="GENEID"))
    l2 <- length(keys(txdb, keytype="TXID"))
    checkTrue(l1 < l2)
    
    k3 <- head(keys(txdb, keytype="GENEID", pattern=".2$",
                    column="TXNAME", fuzzy=TRUE))
    res <- suppressWarnings( select(txdb, k3, columns=c("GENEID","TXNAME"),
                                   keytype="GENEID"))
    checkTrue(any(grepl(".2$",res$TXNAME)))
}



test_select <- function(){
  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID"), colnames(res))
  checkTrue(length(res$GENEID)==length(keys))
  checkIdentical(res$GENEID, keys)

  keys = head(keys(txdb, "TXID"))
  cols = c("TXID")
  res <- select(txdb, keys, cols, keytype="TXID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXID"), colnames(res))
  checkTrue(length(res$TXID)==length(keys))
  checkIdentical(res$TXID, keys)
 
  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID","TXID")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","TXID"), colnames(res))
  checkTrue(length(unique(res$GENEID))==length(keys))

  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID","TXID", "EXONRANK")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","TXID","EXONRANK"), colnames(res))
  checkTrue(length(unique(res$GENEID))==length(keys))

  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID","TXID", "EXONRANK","CDSID")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","CDSID","TXID","EXONRANK"), colnames(res))
  checkTrue(length(unique(res$GENEID))==length(keys))

  ## It's really cosmetic but: should the order of the final data.frame match
  ## the order of the cols?
  ## I think so, except that we may add a col for keys (even if not requested)
  ## if added, such a col should be in front.
  
  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID","TXID", "EXONRANK", "EXONID")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","EXONID","TXID","EXONRANK"), colnames(res))
  checkTrue(length(unique(res$GENEID))==length(keys))

  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID","TXID", "EXONRANK", "EXONID", "CDSID")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","CDSID","EXONID","TXID","EXONRANK"), colnames(res))
  checkTrue(length(unique(res$GENEID))==length(keys))
  
  keys = head(keys(txdb, "TXID"))
  cols = c("TXID", "EXONRANK", "EXONID", "CDSID")
  res <- select(txdb, keys, cols, keytype="TXID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXID","CDSID","EXONID","EXONRANK"), colnames(res))
  checkTrue(length(unique(res$TXID))==length(keys))

  keys = head(keys(txdb, "EXONID"))
  cols = c("EXONRANK", "EXONID", "CDSID")
  res <- select(txdb, keys, cols, keytype="EXONID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("EXONID","CDSID","EXONRANK"), colnames(res))
  checkTrue(length(unique(res$EXONID))==length(keys))
  
  keys = head(keys(txdb, "TXNAME"))
  cols = c("GENEID","TXNAME", "CDSID", "EXONSTART")
  res <- select(txdb, keys, cols, keytype="TXNAME")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXNAME","CDSID","EXONSTART","GENEID"), colnames(res))  
  checkTrue(length(unique(res$TXNAME))==length(keys))

  
  keys = head(keys(txdb, "TXNAME"))
  cols = c("GENEID", "EXONSTART","TXNAME")
  res <- select(txdb, keys, cols, keytype="TXNAME")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXNAME","EXONSTART","GENEID"), colnames(res))
  checkTrue(length(unique(res$TXNAME))==length(keys))
    
  
  keys = head(keys(txdb, "TXNAME"))
  cols = c("GENEID", "CDSID","TXNAME")
  res <- select(txdb, keys, cols, keytype="TXNAME")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXNAME","CDSID","GENEID"), colnames(res))
  checkTrue(length(unique(res$TXNAME))==length(keys))
    
  
  keys = head(keys(txdb, "TXID"))
  cols = c("GENEID","TXNAME", "TXID")
  res <- select(txdb, keys, cols, keytype="TXID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXID","GENEID","TXNAME"), colnames(res))
  checkTrue(length(unique(res$TXID))==length(keys))
  ## For this particular case, we want to make sure that the TXNAMES are not
  ## being copied (there should be one unique one for each ID in this range)
  checkTrue(length(unique(res$TXNAME)) == length(res$TXNAME))
  
  keys = head(keys(txdb, "CDSNAME"))
  cols = c("GENEID","TXNAME", "TXID", "CDSNAME")
  checkException(select(txdb, keys, cols, keytype="CDSNAME"), silent=TRUE)
  
  keys = head(keys(txdb, "CDSID"))
  cols = c("GENEID","TXNAME", "TXID", "CDSNAME")
  res <- select(txdb, keys, cols, keytype="CDSID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols)+1) ## this is one where we ADD an extra!
  checkIdentical(c("CDSID","CDSNAME","GENEID","TXID","TXNAME"), colnames(res))
  checkTrue(length(unique(res$CDSID))==length(keys))

  
  ## stress test (this used to take way too long)
  keys = head(keys(txdb, "GENEID"))
  cols = c("GENEID","CDSSTART")
  res <- select(txdb, keys, cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","CDSSTART"), colnames(res))
    
}

test_select_isActiveSeq <- function(){
  
  ## set isActiveSeq to only watch chr1
 txdb <- restoreSeqlevels(txdb)  ## This is to reset things (safety measure)
 isActiveSeq(txdb)[seqlevels(txdb)] <- FALSE
 isActiveSeq(txdb) <- c("chr1"=TRUE)  
  
  ## then use select
  keys <- head(keys(txdb, "GENEID"))
  cols <- c("GENEID","CDSSTART", "CDSCHROM")
  res <- select(txdb, keys, columns = cols, keytype="GENEID")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("GENEID","CDSCHROM","CDSSTART"), colnames(res))
  uniqChrs <- unique(res$CDSCHROM)[!is.na(unique(res$CDSCHROM))]
  checkIdentical(c("chr1"),uniqChrs)

  ## keys must contain keys that match to more than one thing
  keys <- c(head(keys(txdb,keytype="TXNAME")),
            tail(keys(txdb,keytype="TXNAME")))
  cols <- c("TXNAME","TXCHROM","TXSTRAND")
  res <- select(txdb, keys, columns = cols, keytype="TXNAME")
  checkTrue(dim(res)[1]>0)
  checkTrue(dim(res)[2]==length(cols))
  checkIdentical(c("TXNAME","TXCHROM","TXSTRAND"), colnames(res))
  uniqChrs <- unique(res$TXCHROM)[!is.na(unique(res$TXCHROM))]
  checkIdentical(c("chr1"),uniqChrs)  
}
Bioconductor/GenomicFeatures documentation built on May 4, 2024, 4:43 p.m.