inst/unitTests/test_transcripts.R

require("Homo.sapiens")

## TODO: to speed up unit tests I need some small pieces of annotation
## for testing a small subset of granges and the matching metadata
## Ideally I want a Homo.sapiens package that uses the small subset DB
## from GenomicFeatures.  Perhaps a "mini-me" package for testing? -
## but making this is a bit of a project.

x <- Homo.sapiens
txdb <- OrganismDbi:::.getTxDb(x)


## some internal testing (make sure helpers work as expected)

test_compressMetadata <- function(){
    cols <- c("SYMBOL","GENENAME", "TXCHROM", "PMID")
    txs <- transcripts(txdb, columns="tx_id")[1:100]  ## shortened
    meta <- select(x, keys=as.character(mcols(txs)$tx_id), cols, "TXID") 
    f <- factor(meta[["TXID"]],levels=mcols(txs)[["tx_id"]])
    res <- OrganismDbi:::.compressMetadata(f, meta, "TXID")
    checkTrue(is(res, "DataFrame"))
    checkTrue(dim(res)[2] ==4)
    checkTrue(dim(res)[1] ==100)
    checkTrue(all(colnames(res) %in% cols))
}

## .combineMetadata is an important helper function.
test_combineMetadata <- function(){
    cols <- c("SYMBOL","GENENAME", "TXCHROM", "PMID")
    txs <- transcripts(txdb, columns="tx_id")[1:100]  ## shortened
    meta <- select(x, keys=as.character(mcols(txs)$tx_id), cols, "TXID") 
    res <- OrganismDbi:::.combineMetadata(txs,meta,avoidID="TXID",
                                          joinID="tx_id", columns=cols)
    checkTrue(is(res, "DataFrame"))
    checkTrue(dim(res)[2] ==4)
    checkTrue(dim(res)[1] ==100)
    checkTrue(all(colnames(res) %in% c(cols))) 
}


## These tests are slow so I will need a smaller thing to test with...
test_transcripts <- function(){
    library(Homo.sapiens); h = Homo.sapiens; cols = c("TXNAME","SYMBOL")
    res <- transcripts(h, columns=cols)
    checkTrue(class(res) == "GRanges")
    checkTrue(length(res) > 80000)
    checkTrue(all(colnames(mcols(res)) %in%
                  c("TXNAME","SYMBOL")))
}

test_exons <- function(){
    library(Homo.sapiens); h = Homo.sapiens; cols = c("TXCHROM","REFSEQ")
    res <- exons(h, columns=cols)
    checkTrue(class(res) == "GRanges")
    checkTrue(length(res) > 200000)
    checkTrue(all(colnames(mcols(res)) %in%
                  c("TXCHROM","REFSEQ")))
}

test_cds <- function(){
    library(Homo.sapiens); h = Homo.sapiens; cols = c("GENENAME","SYMBOL")
    res <- cds(h, columns=cols)
    checkTrue(class(res) == "GRanges")
    checkTrue(length(res) > 200000)
    checkTrue(all(colnames(mcols(res)) %in%
                  c("GENENAME","SYMBOL")))
}





test_transcriptsBy <- function(){
    library(Homo.sapiens);h=Homo.sapiens;by="gene";cols = c("GENENAME","SYMBOL")
    res <- transcriptsBy(h, by="gene", cols)    
    checkTrue(class(res) == "CompressedGRangesList")
    checkTrue(length(res) > 20000)
    ## check inner mcols
    checkTrue(all(colnames(mcols(res[[1]])) %in%
                  c("tx_name","GENENAME","SYMBOL")))
    ## check outer mcols
    checkTrue(all(colnames(mcols(res)) %in%
                  c("GENEID","GENENAME","SYMBOL")))

    ## extra check for case where we only have one field.
    cols = c("SYMBOL")
    res2 <- transcriptsBy(h, by="gene", cols)
    checkTrue(class(res) == "CompressedGRangesList")
    checkTrue(length(res) > 20000)
    ## check inner mcols
    checkTrue(all(colnames(mcols(res2[[1]])) %in%
                  c("tx_name","SYMBOL")))
    ## outer mcols
    checkTrue(all(colnames(mcols(res2)) %in%
                  c("GENEID","SYMBOL")))
    ## 
}

test_exonsBy <- function(){
    library(Homo.sapiens);h=Homo.sapiens;by="gene";cols = c("GENENAME","SYMBOL")
    res <- exonsBy(h, by="gene", cols)
    ## TODO: look more closely at this one.  The metadata looks off...
    checkTrue(class(res) == "CompressedGRangesList")
    checkTrue(length(res) > 20000) 
    checkTrue(all(colnames(mcols(res)) %in%
                  c("GENENAME","SYMBOL")))
}

test_cdsBy <- function(){
    library(Homo.sapiens);h=Homo.sapiens;by="gene";cols = c("GENENAME","SYMBOL")
    res <- cdsBy(h, by="gene", cols)
    checkTrue(class(res) == "CompressedGRangesList")
    checkTrue(length(res) > 19000)
    checkTrue(all(colnames(mcols(res)) %in%
                  c("GENENAME","SYMBOL")))
}




#################
## This test adresses a bug that relates to the access of TXID and
## other column values that were used for "joining" data together.

test_rangeMethods_for_JoinFailures <- function(){
    library(Homo.sapiens);h<-Homo.sapiens; cols<-"TXID"    
    res <- transcripts(h, columns=cols)
    checkTrue(names(mcols(res))=="TXID")
    checkTrue(class(res) == "GRanges")
    checkTrue(length(res) > 10000)  ## large
    
    res <- transcriptsBy(h, by="gene", columns=cols)
    checkTrue("TXID" %in% names(mcols(res[[1]])))
    checkTrue(class(res) == "CompressedGRangesList")
    checkTrue(length(res) > 10000)  ## large
}



## Other Bug: for exons() 'filter' argument does not work on Homo.sapiens objects...

test_filterArg <- function(){
    ## this works:
    res <- exons(Homo.sapiens, filter=list(gene_id="1"))
    checkTrue(class(res) == "GRanges")
    checkTrue(length(res) < 20)  ## small

    ## so does this! 
    res <- exons(Homo.sapiens, columns="SYMBOL", filter=list(gene_id="1"))
    checkTrue("SYMBOL" %in% names(mcols(res)))
    checkTrue(class(res) == "GRanges")
    checkTrue(length(res) < 20)  ## small
}


## Fast testing: BiocGenerics:::testPackage(pattern="^test_transcripts.*\\.R$")

Try the OrganismDbi package in your browser

Any scripts or data that you put into this service are public.

OrganismDbi documentation built on Nov. 8, 2020, 5:50 p.m.