############################################################
## Compare MySQL vs SQLite backends:
## Amazing how inefficient the MySQL backend seems to be! Most
## likely it's due to RMySQL, not MySQL.
dontrun_test_MySQL_vs_SQLite <- function() {
## Compare the performance of the MySQL backend against
## the SQLite backend.
edb_mysql <- useMySQL(edb, user = "anonuser", pass = "")
library(microbenchmark)
## genes
microbenchmark(genes(edb), genes(edb_mysql), times = 5)
microbenchmark(genes(edb, filter = GeneBiotypeFilter("lincRNA")),
genes(edb_mysql, filter = GeneBiotypeFilter("lincRNA")),
times = 5)
microbenchmark(genes(edb, filter = SeqNameFilter(20:23)),
genes(edb_mysql, filter = SeqNameFilter(20:23)),
times = 5)
microbenchmark(genes(edb, columns = "tx_id"),
genes(edb_mysql, columns = "tx_id"),
times = 5)
microbenchmark(genes(edb, filter = GenenameFilter("BCL2L11")),
genes(edb_mysql, filter = GenenameFilter("BCL2L11")),
times = 5)
## transcripts
microbenchmark(transcripts(edb),
transcripts(edb_mysql),
times = 5)
microbenchmark(transcripts(edb, filter = GenenameFilter("BCL2L11")),
transcripts(edb_mysql, filter = GenenameFilter("BCL2L11")),
times = 5)
## exons
microbenchmark(exons(edb),
exons(edb_mysql),
times = 5)
microbenchmark(exons(edb, filter = GenenameFilter("BCL2L11")),
exons(edb_mysql, filter = GenenameFilter("BCL2L11")),
times = 5)
## exonsBy
microbenchmark(exonsBy(edb),
exonsBy(edb_mysql),
times = 5)
microbenchmark(exonsBy(edb, filter = SeqNameFilter("Y")),
exonsBy(edb_mysql, filter = SeqNameFilter("Y")),
times = 5)
## cdsBy
microbenchmark(cdsBy(edb), cdsBy(edb_mysql), times = 5)
microbenchmark(cdsBy(edb, by = "gene"), cdsBy(edb_mysql, by = "gene"),
times = 5)
microbenchmark(cdsBy(edb, filter = SeqStrandFilter("-")),
cdsBy(edb_mysql, filter = SeqStrandFilter("-")),
times = 5)
}
## Compare the performance of doing the sorting within R or
## directly in the SQL query.
dontrun_test_ordering_performance <- function() {
library(RUnit)
library(RSQLite)
## gene table: order by in SQL query vs R:
db_con <- dbconn(edb)
.callWithOrder <- function(con, query, orderBy = "",
orderSQL = TRUE) {
if (all(orderBy == ""))
orderBy <- NULL
if (orderSQL & !is.null(orderBy)) {
orderBy <- paste(orderBy, collapse = ", ")
query <- paste0(query, " order by ", orderBy)
}
res <- dbGetQuery(con, query)
if (!orderSQL & !all(is.null(orderBy))) {
if (!all(orderBy %in% colnames(res)))
stop("orderBy not in columns!")
## Do the ordering in R
res <- res[do.call(order,
c(list(method = "radix"),
as.list(res[, orderBy, drop = FALSE]))), ]
}
rownames(res) <- NULL
return(res)
}
#######################
## gene table
## Simple condition
the_q <- "select * from gene"
system.time(res1 <- .callWithOrder(db_con, query = the_q))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderSQL = FALSE))
checkIdentical(res1, res2)
## order by gene_id
orderBy <- "gene_id"
system.time(res1 <- .callWithOrder(db_con, query = the_q, orderBy = orderBy))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderBy = orderBy, orderSQL = FALSE))
## SQL: 0.16, R: 0.164.
checkIdentical(res1, res2)
## order by gene_name
orderBy <- "gene_name"
system.time(res1 <- .callWithOrder(db_con, query = the_q, orderBy = orderBy))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderBy = orderBy, orderSQL = FALSE))
checkIdentical(res1, res2)
## SQL: 0.245, R: 0.185
## sort by gene_name and gene_seq_start
orderBy <- c("gene_name", "gene_seq_start")
system.time(res1 <- .callWithOrder(db_con, query = the_q, orderBy = orderBy))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderBy = orderBy, orderSQL = FALSE))
## SQL: 0.26, R: 0.188
checkEquals(res1, res2)
## with subsetting:
the_q <- "select * from gene where seq_name in ('5', 'Y')"
orderBy <- c("gene_name", "gene_seq_start")
system.time(res1 <- .callWithOrder(db_con, query = the_q, orderBy = orderBy))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderBy = orderBy, orderSQL = FALSE))
## SQL: 0.031, R: 0.024
checkEquals(res1, res2)
########################
## joining tables.
the_q <- paste0("select * from gene join tx on (gene.gene_id = tx.gene_id)",
" join tx2exon on (tx.tx_id = tx2exon.tx_id)")
orderBy <- c("tx_id", "exon_id")
system.time(res1 <- .callWithOrder(db_con, query = the_q, orderBy = orderBy))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderBy = orderBy, orderSQL = FALSE))
## SQL: 9.6, R: 9.032
checkEquals(res1, res2)
## subsetting.
the_q <- paste0("select * from gene join tx on (gene.gene_id = tx.gene_id)",
" join tx2exon on (tx.tx_id = tx2exon.tx_id) where",
" seq_name = 'Y'")
orderBy <- c("tx_id", "exon_id")
system.time(res1 <- .callWithOrder(db_con, query = the_q, orderBy = orderBy))
system.time(res2 <- .callWithOrder(db_con, query = the_q,
orderBy = orderBy, orderSQL = FALSE))
## SQL: 0.9, R: 1.6
checkEquals(res1, res2)
}
## Compare the performance of inner join with left outer join.
dontrun_test_outer_join_performance <- function() {
Q_1 <- ensembldb:::joinQueryOnTables2(edb, tab = c("gene", "exon"))
Q_2 <- ensembldb:::joinQueryOnTables2(edb, tab = c("gene", "exon"),
startWith = "exon")
Q_3 <- ensembldb:::joinQueryOnTables2(edb, tab = c("gene", "exon"),
startWith = "exon",
join = "left outer join")
library(microbenchmark)
library(RSQLite)
microbenchmark(dbGetQuery(dbconn(edb), paste0("select * from ", Q_1)),
dbGetQuery(dbconn(edb), paste0("select * from ", Q_2)),
dbGetQuery(dbconn(edb), paste0("select * from ", Q_3)),
times = 10)
## Result: Q_1 is a second faster (13 instead of 14).
## Check performance joining tx and genes.
Q_1 <- ensembldb:::joinQueryOnTables2(edb, tab = c("tx", "gene"))
Q_2 <- ensembldb:::joinQueryOnTables2(edb, tab = c("tx", "gene"),
startWith = "tx")
Q_3 <- ensembldb:::joinQueryOnTables2(edb, tab = c("tx", "gene"),
startWith = "tx",
join = "left outer join")
microbenchmark(dbGetQuery(dbconn(edb), paste0("select * from ", Q_1)),
dbGetQuery(dbconn(edb), paste0("select * from ", Q_2)),
dbGetQuery(dbconn(edb), paste0("select * from ", Q_3)),
times = 10)
## No difference.
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.