Nothing
#currently no tests for the presupplied masks, correct.db.names, the Database and TableSchemaList classes.
check.table.import <- function(dta, tbsl, name, pks=paste(name, "ind", sep="_"))
{
expect_is(tbsl, "TableSchemaList")
expect_named(tbsl@tab.list, name)
#are the column names ok
expect_equal(length(intersect(tbsl@tab.list[[name]]$db.cols, union(names(dta), pks))), length(union(tbsl@tab.list[[name]]$db.cols, union(names(dta), pks))))
#are the column types ok in a basic sense
##just removes autoincremented PKs
tab.cols <- sapply(names(dta), function(x) class(dta[,x]))
tab.cols <- toupper(tab.cols)
tab.cols[tab.cols %in% c("CHARACTER", "FACTOR")] <- "TEXT"
#just because its encoded as numeric in the df doesn't mean it should be...
actually.ints <- mapply(function(x,y){
if (x == "NUMERIC"){
isTRUE(all.equal(floor(y), y))
}else{
F
}
}, tab.cols, as.list(dta))
tab.cols[actually.ints] <- "INTEGER"
expect_identical(tab.cols, tbsl@tab.list[[name]]$db.schema[names(tab.cols)])
}
#these are simple relationships so the keys will be equivalent and there should be no modifications of columns etc
check.direct.keys <- function(tbsl, from, to, key.name, orig.to.obj, orig.from.obj)
{
expect_named(tbsl@tab.list[[to]]$foreign.keys, from)
expect_identical(tbsl@tab.list[[to]]$foreign.keys[[from]]$local.keys, tbsl@tab.list[[to]]$foreign.keys[[from]]$ext.keys)
expect_identical(tbsl@tab.list[[to]]$foreign.keys[[from]]$local.keys, key.name)
expect_identical(tbsl@tab.list[[to]]$db.col, orig.to.obj@tab.list[[to]]$db.col)
expect_identical(tbsl@tab.list[[to]]$db.schema, orig.to.obj@tab.list[[to]]$db.schema)
expect_identical(tbsl@tab.list[[from]]$db.col, orig.from.obj@tab.list[[from]]$db.col)
expect_identical(tbsl@tab.list[[from]]$db.schema, orig.from.obj@tab.list[[from]]$db.schema)
}
convert.factors.to.strings <- function(dta)
{
for(i in 1:ncol(dta))
{
if(class(dta[,i]) == "factor")
{
dta[,i] <- as.character(dta[,i])
}
}
return(dta)
}
test_that("Create and work with TBSL and Database objects in a basic sense",
{
#makeSchemaFromData, append and length
baseball.teams <- new("TableSchemaList")
expect_equal(length(baseball.teams), 0)
franches <- makeSchemaFromData(TeamsFranchises, name="team_franch")
check.table.import(TeamsFranchises, franches, "team_franch")
baseball.teams <- append(baseball.teams, franches)
expect_equal(length(baseball.teams), 1)
teams <- makeSchemaFromData(Teams, name="teams")
check.table.import(Teams, teams, "teams")
baseball.teams <- append(baseball.teams, teams)
expect_equal(length(baseball.teams), 2)
salaries <- makeSchemaFromData(Salaries, name="salaries")
check.table.import(Salaries, salaries, "salaries")
baseball.teams <- append(baseball.teams, salaries)
expect_equal(length(baseball.teams), 3)
#relationships
relationship(baseball.teams, from="team_franch", to="teams") <- franchID ~ franchID
check.direct.keys(baseball.teams, from="team_franch", to="teams", key.name="franchID", orig.to.obj=teams, orig.from.obj=franches)
relationship(baseball.teams, from="teams", to="salaries") <- teamID ~ teamID
check.direct.keys(baseball.teams, from="teams", to="salaries", key.name="teamID", orig.to.obj=salaries, orig.from.obj=teams)
#helpers for TableSchemaLists
col.list <- columns(baseball.teams)
expect_named(col.list, c("team_franch", "teams", "salaries"))
expect_equal(col.list, list(team_franch=c("team_franch_ind", names(TeamsFranchises)), teams=c("teams_ind", names(Teams)), salaries=c("salaries_ind", names(Salaries))))
expect_equal(tables(baseball.teams), c("team_franch", "teams", "salaries"))
#Basic formation and checks of Database objects
temp.db <- tempfile()
baseball.db <- Database(baseball.teams, temp.db)
#columns
expect_equal(columns(baseball.db), columns(baseball.teams))
#tables
expect_equal(tables(baseball.db), tables(baseball.teams))
#dbFile
expect_equal(dbFile(baseball.db), temp.db)
#schema
expect_equal(schema(baseball.db)@tab.list, baseball.teams@tab.list)
#maybe not the best way to do this, though haven't seen another aside from re-creating the object
assign(x="baseball.db", value=baseball.db, envir=.GlobalEnv)
})
test_that("Another, more complex TBSL example based off a sample tracking use case",{
db.list <- test.db.1()
sample.tracking <- new("TableSchemaList")
clinical <- makeSchemaFromData(db.list$clinical, name="clinical")
check.table.import(db.list$clinical, clinical, "clinical")
sample.tracking <- append(sample.tracking, clinical)
expect_equal(length(sample.tracking), 1)
#this one should fail due to ng.ul column
expect_error(makeSchemaFromData(db.list$dna, name="dna"))
db.list$dna <- correct.df.names(db.list$dna)
dna <- makeSchemaFromData(db.list$dna, name="dna")
check.table.import(db.list$dna, dna, "dna")
sample.tracking <- append(sample.tracking, dna)
samples <- makeSchemaFromData(db.list$samples, name="samples")
check.table.import(db.list$samples, samples, "samples")
sample.tracking <- append(sample.tracking, samples)
expect_equal(length(sample.tracking), 3)
#more complicated usage of relationship
relationship(sample.tracking, from="clinical", to="samples") <- sample_id~sample_id
check.direct.keys(sample.tracking, from="clinical", to="samples", key.name="sample_id", orig.to.obj=samples, orig.from.obj=clinical)
relationship(sample.tracking, from="clinical", to="dna") <-sample_id~sample_id
check.direct.keys(sample.tracking, from="clinical", to="dna", key.name="sample_id", orig.to.obj=dna, orig.from.obj=clinical)
#Here, db.cols (and db.schema) should be modified so that sample and wave in samples should be replaced with dna's autoinc pk
relationship(sample.tracking, from="dna", to="samples") <- .~sample_id+wave
#if there was no clinical to samples rels would expect below however will need to keep sample_id to be consistent with other relationships
#expect_equal(sort(sample.tracking@tab.list$samples$db.cols), sort(c("samples_ind", "dna_ind", names(db.list$samples)[names(db.list$samples) %in% c("sample_id", "wave")==F])))
expect_equal(sort(sample.tracking@tab.list$samples$db.cols), sort(c("samples_ind", "dna_ind", names(db.list$samples)[names(db.list$samples) %in% "wave"==F])))
#this should not be true for dna
expect_equal(sort(sample.tracking@tab.list$dna$db.cols), sort(c("dna_ind", names(db.list$dna))))
#also check that the keys look sane
expect_named(sample.tracking@tab.list$samples$foreign.keys, c("clinical", "dna"))
#should just be the direct keys for clinical
expect_equal(sample.tracking@tab.list$samples$foreign.keys$clinical, list(local.keys="sample_id", ext.keys="sample_id"))
#should be sample_id and wave as well as dna's pk
expect_equal(sample.tracking@tab.list$samples$foreign.keys$dna, list(local.keys="dna_ind", ext.keys=c("sample_id", "wave")))
assign(x="sample.tracking", value=sample.tracking, envir=.GlobalEnv)
})
test_that("constraint<- method is sane",{
constraint(sample.tracking, "dna") <- ~ sample_id + wave
expect_equal(gsub("\\s+", "", sample.tracking@tab.list$dna$db.constr), gsub("\\s+", "", "CONSTRAINT dna_idx UNIQUE (sample_id, wave)"))
expect_true(grepl(gsub("\\s+", "", "CONSTRAINT dna_idx UNIQUE (sample_id, wave)"), gsub("\\s+", "", createTable(sample.tracking, 'dna', mode='normal')), fixed=T))
expect_true(grepl("INSERT\\s+OR\\s+IGNORE", insertStatement(sample.tracking, 'dna')))
constraint(sample.tracking, "dna", should.ignore=F, constr.name="test") <- ~ sample_id + wave
expect_identical(gsub("\\s+", "", sample.tracking@tab.list$dna$db.constr), gsub("\\s+", "", "CONSTRAINT test UNIQUE (sample_id, wave)"))
expect_true(sample.tracking@tab.list$dna$should.ignore == F)
expect_true(grepl(gsub("\\s+", "", "CONSTRAINT test UNIQUE (sample_id, wave)"), gsub("\\s+", "", createTable(sample.tracking, 'dna', mode='normal')), fixed=T))
expect_true(grepl("INSERT\\s+INTO", insertStatement(sample.tracking, 'dna')))
constraint(sample.tracking, "dna", should.ignore=F) <- NULL
expect_true(sample.tracking@tab.list$dna$db.constr == "")
expect_true(sample.tracking@tab.list$dna$should.ignore == F)
})
test_that("createTable",
{
tbsl <- sample.tracking
valid.tables <- names(tbsl@tab.list)
db.con <- dbConnect(RSQLite::SQLite(), tempfile())
for(i in valid.tables)
{
#print(i)
for (j in c("normal", "merge"))
{
#print(j)
f.keys <- tbsl@tab.list[[i]]$foreign.keys
#if there are no foreign keys available, don't allow create table statements to be generated
if (j == "merge" && poplite:::shouldMerge(tbsl, i)==F)
{
expect_error(createTable(tbsl, table.name=i, mode="merge"))
}
else
{
if (is.null(f.keys) || j == "normal")
{
add.cols <- character(0)
add.type <- character(0)
add.pk <- integer(0)
}
else
{
#the basic table should already exist so can retrieve the previous info on coltyps
temp.prag <- do.call("rbind", lapply(names(f.keys), function(x)
{
dbGetQuery(db.con, paste("pragma table_info(",x,")"))
}))
key.vals <- as.character(unlist(sapply(f.keys, "[[", "ext.keys")))
key.prag <- temp.prag[temp.prag$name %in% key.vals,]
add.cols <- key.prag$name
add.type <- key.prag$type
add.pk <- rep(0L, length(add.type))
}
prag.tab.name <- ifelse(j=="merge", paste0(i, "_temp"), i)
expect_true(dbExecute(db.con, createTable(tbsl, table.name=i, mode=j)) == 0)
tab.prag <- dbGetQuery(db.con, paste("pragma table_info(",prag.tab.name,")"))
sub.prag <- tab.prag[,c("name", "type", "pk")]
col.types <- sapply(strsplit(tbsl@tab.list[[i]]$db.schema, "\\s+"), "[", 1)
col.names <- tbsl@tab.list[[i]]$db.cols
is.pk <- as.integer(grepl("PRIMARY KEY", tbsl@tab.list[[i]]$db.schema))
col.names <- append(col.names, add.cols)
col.types <- append(col.types, add.type)
is.pk <- append(is.pk, add.pk)
query.dta <- data.frame(name=col.names, type=col.types, pk=is.pk, stringsAsFactors=FALSE)
if (j == "merge")
{
#need to add in constrains similar to shouldMerge here...
keep.cols <- unlist(lapply(f.keys, function(x)
{
if (length(intersect(x$local.keys, x$ext.keys)) == length(union(x$local.keys, x$ext.keys)))
{
return(x$local.keys)
}else{
return(NULL)
}
}))
#there can be some duplicates in this scenario
query.dta <- query.dta[!duplicated(query.dta),]
#remove the local keys except for those that are 'direct'
locs <- unique(sapply(f.keys, "[[", "local.keys"))
rm.locs <- setdiff(locs, keep.cols)
query.dta <- query.dta[query.dta$pk == 0 & query.dta$name %in% rm.locs == F,]
}
ord.prag <- sub.prag[do.call("order", sub.prag),]
ord.query <- query.dta[do.call("order", query.dta),]
rownames(ord.prag) <- NULL
rownames(ord.query) <- NULL
expect_equal(ord.prag, ord.query)
}
}
}
dbDisconnect(db.con)
})
test_that("insertStatement", {
set.seed(123)
tbsl <- sample.tracking
valid.tables <- names(tbsl@tab.list)
db.con <- dbConnect(RSQLite::SQLite(), tempfile())
for(i in valid.tables)
{
#print(i)
for(j in c("normal", "merge"))
{
#print(j)
f.keys <- tbsl@tab.list[[i]]$foreign.keys
if (j == "merge" && poplite:::shouldMerge(tbsl, i)==F)
{
expect_error(insertStatement(tbsl, i, j))
}
else
{
#first create the tables
dbExecute(db.con, createTable(tbsl, table.name=i, mode=j))
prag.tab.name <- ifelse(j=="merge", paste0(i, "_temp"), i)
tab.prag <- dbGetQuery(db.con, paste("pragma table_info(",prag.tab.name,")"))
#create a couple lines of fake data to insert into the database
ins.dta <- as.data.frame(matrix(sample.int(10000, 10*nrow(tab.prag)), ncol=nrow(tab.prag), nrow=10, dimnames=list(NULL, tab.prag$name)), stringsAsFactors=fALSE)
for(p in colnames(ins.dta))
{
if (tab.prag$type[tab.prag$name == p] == "TEXT")
{
ins.dta[,p] <- as.character(ins.dta[,p])
}
}
#load into the database
res <- dbSendStatement(db.con, insertStatement(tbsl, i, mode=j))
dbBind(res, ins.dta)
expect_true(dbGetRowsAffected(res) == nrow(ins.dta))
dbClearResult(res)
#check whether it respects should.ignore
ignore.match <- regexpr(pattern="INSERT\\s+OR\\s+IGNORE", text=insertStatement(tbsl, i, mode=j), perl=TRUE)
if (tbsl@tab.list[[i]]$should.ignore)
{
expect_true(ignore.match != -1)
}
else
{
expect_true(ignore.match == -1)
}
}
}
}
dbDisconnect(db.con)
})
test_that("mergeStatement",
{
tbsl <- sample.tracking
valid.tables <- names(tbsl@tab.list)
for(i in valid.tables)
{
#again if there are no foreign keys make sure the query dies
f.keys <- tbsl@tab.list[[i]]$foreign.keys
#print(i)
if (is.null(f.keys))
{
expect_error(mergeStatement(tbsl, i))
}
else
{
#only consider the f.keys which are not direct
is.direct.keys <- sapply(f.keys, function(x) length(intersect(x$local.keys, x$ext.keys)) == length(union(x$local.keys, x$ext.keys)))
f.keys <- f.keys[is.direct.keys == F]
cur.stat <- mergeStatement(tbsl, i)
#is the insert statement table definition consistent
tab.match <- regexpr(pattern=paste0(i, "\\s+\\(\\s+([\\w+_,]+)\\s+\\)"), text=cur.stat, perl=TRUE)
tab.str <- substr(cur.stat, start=attr(tab.match, "capture.start"), stop=attr(tab.match, "capture.start")+attr(tab.match, "capture.length")-1)
split.tab <- strsplit(tab.str, ",")[[1]]
tab.cols <- tbsl@tab.list[[i]]$db.cols
tab.cols <- tab.cols[tbsl@tab.list[[i]]$db.schema != "INTEGER PRIMARY KEY AUTOINCREMENT"]
expect_true(length(intersect(split.tab, tab.cols)) == length(union(split.tab, tab.cols)))
#is the select statement consistent with the insert statement table definition
##need to take into account the specified tables for the statements.
##for now will simply remove the table names, prior to checking. If a problem in the future
##can make sure such columns exist as well.
select.match <- regexpr(pattern=paste0("SELECT\\s+", tab.str), text=gsub("[\\w_]+\\.", "", cur.stat, perl=T), perl=TRUE)
expect_true(select.match != -1)
#are the joins sane
join.base <- sapply(f.keys, function(x) paste0("\\(", paste(x$ext.keys, collapse=","), "\\)"))
join.str <- paste0(i, "_temp\\s+", paste(paste("JOIN", names(join.base), "USING", join.base, sep="\\s+"), collapse="\\s+"))
join.match <- regexpr(pattern=join.str, text=cur.stat, perl=TRUE)
expect_true(join.match != -1)
#is it respecting should.ignore
ignore.match <- regexpr(pattern="INSERT\\s+OR\\s+IGNORE", text=cur.stat, perl=TRUE)
if (tbsl@tab.list[[i]]$should.ignore)
{
expect_true(ignore.match != -1)
}else
{
expect_true(ignore.match == -1)
}
}
}
})
test_that("Database population",{
#simple example first
ins.vals <- list(team_franch=TeamsFranchises, teams=Teams, salaries=Salaries)
#populate the entire database
do.call(populate, append(list(baseball.db), ins.vals))
#attempt to also add in an empty data.frame, which should simply not add anything to the db
empty.df <- TeamsFranchises[F,]
populate(baseball.db, team_franch=empty.df, use.tables="team_franch")
#read back in each of the tables and make sure they are consistent with in memory data.frames
test.con <- dbConnect(RSQLite::SQLite(), dbFile(baseball.db))
expect_true(all(names(ins.vals) %in% dbListTables(test.con)))
db.tab.list <- lapply(names(ins.vals), function(x)
{
dbReadTable(test.con, x)
})
names(db.tab.list) <- names(ins.vals)
#these should all have 'table_ind' columns
for (i in names(db.tab.list))
{
expect_true(paste(i, "ind", sep="_") %in% names(db.tab.list[[i]]))
}
#these relationships are all 'direct' keys so check for equivalence between the lists removing the 'table'_ind columns
stripped.db.list <- lapply(db.tab.list, function(x) x[,-1])
fixed.ins <- lapply(ins.vals, convert.factors.to.strings)
expect_equal(stripped.db.list,fixed.ins)
dbDisconnect(test.con)
#more complex example
sample.tracking.db <- Database(sample.tracking, tempfile())
samp.list <- test.db.1()
#from above, this correction had to be done
samp.list$dna <- correct.df.names(samp.list$dna)
do.call(populate, append(list(sample.tracking.db), samp.list))
#again read in the db tables
test.con <- dbConnect(RSQLite::SQLite(), dbFile(sample.tracking.db))
expect_true(all(names(samp.list) %in% dbListTables(test.con)))
db.tab.list <- lapply(names(samp.list), function(x)
{
dbReadTable(test.con, x)
})
names(db.tab.list) <- names(samp.list)
#all should have a 'table'_ind column
for (i in names(db.tab.list))
{
expect_true(paste(i, "ind", sep="_") %in% names(db.tab.list[[i]]))
}
#for clinical and dna, should just be the 'table'_ind columns as above
stripped.db.list.1 <- lapply(db.tab.list[c('clinical', 'dna')], function(x) x[,-1])
fixed.samp.list <- lapply(samp.list , convert.factors.to.strings)
expect_equal(stripped.db.list.1,fixed.samp.list[c('clinical', 'dna')], check.attributes=F)
#for samples, will be both 'table'_ind column as well as a 'foreign table'_ind derived from dna and a 'direct' key from clinical
##merge this table with dna to pull out the appropriate column
test.samples <- samp.list[['samples']]
#add in the samples_ind in this case
test.samples$samples_ind <- 1:nrow(test.samples)
test.dna <- samp.list[['dna']]
#it is simply autoincremented PK
test.dna$dna_ind <- 1:nrow(test.dna)
test.samples.merge <- merge(test.samples, test.dna, all=F)
test.samples.merge <- test.samples.merge[,names(db.tab.list[['samples']])]
test.samples.merge <- convert.factors.to.strings(test.samples.merge)
test.samples.merge <- test.samples.merge[do.call('order', test.samples.merge),]
res.samples <- db.tab.list$samples
res.samples <- res.samples[do.call('order', res.samples),]
#remove the sample_inds here, as they don't really matter and seem to misalign, probably due to differences in assignment
expect_equal(test.samples.merge[,-1], res.samples[,-1], check.attributes=F)
dbDisconnect(test.con)
#make sure that populate only provides the desired input...
temp.db <- Database(sample.tracking, tempfile())
expect_error(populate(temp.db, samples=samp.list$clinical, use.tables="clinical"))
assign(x="sample.tracking.db", value=sample.tracking.db, envir=.GlobalEnv)
})
#remaining open connection...
test_that("Querying with Database objects",
{
#onto querying Database objects
#sample.tracking.db
test.con <- dbConnect(RSQLite::SQLite(), dbFile(sample.tracking.db))
#start with some basic select queries
db.tab.list <- lapply(tables(sample.tracking.db), function(x)
{
dbReadTable(test.con, x)
})
dbDisconnect(test.con)
names(db.tab.list) <- tables(sample.tracking.db)
db.samps <- db.tab.list[["samples"]]
#the .tables keyword should select all the columns on the given table
all.samps <- select(sample.tracking.db, .tables="samples")
expect_equal(as.data.frame(all.samps), db.samps)
#the se version:
#Warning messages:
# 1: select_() is deprecated.
# Please use select() instead
#
# The 'programming' vignette or the tidyeval book can help you
# to program with select() : https://tidyeval.tidyverse.org
all.samps.se <- select_(sample.tracking.db, .dots=list(.tables="samples"))
expect_equal(as.data.frame(all.samps.se), db.samps)
#try specifying all the tables
all.tab.join <- select(sample.tracking.db, .tables=tables(sample.tracking.db))
merge.dta <- db.tab.list[[1]]
for(i in seq_along(db.tab.list[-1]))
{
merge.dta <- merge(merge.dta, db.tab.list[-1][[i]])
}
atj.dta <- convert.factors.to.strings(as.data.frame(all.tab.join)[,names(merge.dta)])
atj.dta <- atj.dta[do.call("order", atj.dta),]
merge.dta <- convert.factors.to.strings(merge.dta)
merge.dta <- merge.dta[do.call("order", merge.dta),]
expect_equal(atj.dta, merge.dta, check.attributes=F)
#again the se version
all.tab.join.se <- select_(sample.tracking.db, .dots=list(.tables=tables(sample.tracking.db)))
atj.dta.se <- convert.factors.to.strings(as.data.frame(all.tab.join.se)[,names(merge.dta)])
atj.dta.se <- atj.dta.se[do.call("order", atj.dta.se),]
expect_equal(atj.dta.se, merge.dta, check.attributes=F)
#specifying a subset of the tables
sub.tab.join <- select(sample.tracking.db, .tables=c("samples", "dna"))
merge.dta <- merge(db.tab.list[["samples"]], db.tab.list[["dna"]])
stj.dta <- convert.factors.to.strings(as.data.frame(sub.tab.join)[,names(merge.dta)])
stj.dta <- stj.dta[do.call("order", stj.dta),]
merge.dta <- convert.factors.to.strings(merge.dta)
merge.dta <- merge.dta[do.call("order", merge.dta),]
expect_equal(stj.dta, merge.dta, check.attributes=F)
#specifying table, couple of columns
sub.samps <- select(sample.tracking.db, sample_id:dna_ind,.tables="samples")
expect_equal(as.data.frame(sub.samps), db.samps[,c("sample_id", "did_collect", "dna_ind")])
#se version
sub.samps.se <- select_(sample.tracking.db, "sample_id:dna_ind",.dots=list(.tables="samples"))
expect_equal(as.data.frame(sub.samps.se), db.samps[,c("sample_id", "did_collect", "dna_ind")])
#subsetting again without specifying tables
sub.samps.nt <- select(sample.tracking.db, sample_id:dna_ind)
expect_equal(as.data.frame(sub.samps.nt), db.samps[,c("sample_id", "did_collect", "dna_ind")])
#se version
sub.samps.nt.se <- select_(sample.tracking.db, "sample_id:dna_ind")
expect_equal(as.data.frame(sub.samps.nt.se), db.samps[,c("sample_id", "did_collect", "dna_ind")])
#shouldn't work if the subsetting is ambigous
expect_error(select(sample.tracking.db, dna_ind))
#will work if table is specified
#either
expect_equal(as.data.frame(select(sample.tracking.db, dna_ind, .tables="dna")), data.frame(db.tab.list$dna$dna_ind), check.attributes=F)
#or
expect_equal(as.data.frame(select(sample.tracking.db, dna.dna_ind)), data.frame(db.tab.list$dna$dna_ind), check.attributes=F)
#can select columns across different tables
use.cols <- unique(append(names(db.tab.list$samples)[-1], c("sample_id", "sex", "age", "status")))
two.tab.cols <- merge(db.tab.list$samples, db.tab.list$clinical, by="sample_id", all=F)
nt.res <- as.data.frame(select(sample.tracking.db, sample_id:dna_ind, sample_id:status))
expect_true(all(names(nt.res) %in% use.cols))
nt.comp <- nt.res[do.call("order", nt.res[,use.cols]),use.cols]
two.tab.cols <- two.tab.cols[do.call("order", two.tab.cols[,use.cols]),use.cols]
expect_equal(nt.comp, two.tab.cols, check.attributes=F)
#again specifying tables
td.res <- as.data.frame(select(sample.tracking.db, samples.sample_id:dna_ind, clinical.sample_id:status))
expect_true(all(names(td.res) %in% use.cols))
td.res <- td.res[do.call("order", td.res[,use.cols]),use.cols]
expect_equal(td.res, two.tab.cols, check.attributes=F)
#or via the .tables mechanism this should not work
expect_error(select(sample.tracking.db, sample_id:dna_ind, sample_id:status, .tables=c("clinical", "samples")))
#there were a few additional ones, check whether mixed named, unnamed works
nu.res <- as.data.frame(select(sample.tracking.db, samples.sample_id:dna_ind, sample_id:status))
expect_true(all(names(nu.res) %in% use.cols))
nu.res <- nu.res[do.call("order", nu.res[,use.cols]), use.cols]
expect_equal(nu.res, two.tab.cols, check.attributes=F)
#bug that came up when preparing the examples
bug.1 <- select(baseball.db, yearID:WCWin, franchName)
expect_true(all(colnames(bug.1) %in% c("franchName", columns(baseball.db)$teams[which(columns(baseball.db)$teams == "yearID"):which(columns(baseball.db)$teams == "WCWin")])))
#onto filtering
#this shouldn't work as the sample_id column is ambigous
expect_error(filter(sample.tracking.db,sample_id == 1))
samp.1.filt <- filter(sample.tracking.db, samples.sample_id == 1)
samp.1.df <- as.data.frame(samp.1.filt)
#check against the table itself
expect_equal(samp.1.df, db.samps[db.samps$sample_id == 1,])
#standard eval version
#Warning message:
# filter_() is deprecated.
# Please use filter() instead
samp.1.filt.se <- filter_(sample.tracking.db, "samples.sample_id == 1")
samp.1.df.se <- as.data.frame(samp.1.filt.se)
expect_equal(samp.1.df.se, db.samps[db.samps$sample_id == 1,])
#or
samp.1.filt.se.2 <- filter_(sample.tracking.db, .dots=list("samples.sample_id == 1"))
samp.1.df.se.2 <- as.data.frame(samp.1.filt.se.2)
expect_equal(samp.1.df.se.2, db.samps[db.samps$sample_id == 1,])
#also should work like when unambigous:
status.res <- filter(sample.tracking.db, status == 1)
expect_equal(as.data.frame(status.res), db.tab.list$clinical[db.tab.list$clinical$status == 1,], check.attributes=F)
#multiple filters are not defined
expect_error(filter(sample.tracking.db, status == 1, sample_id==3))
#though you can do:
##as long as the columns are uniquely defined
clin.filt.res <- filter(sample.tracking.db, status == 1 & sample_id==3)
expect_equal(as.data.frame(clin.filt.res), db.tab.list$clinical[with(db.tab.list$clinical, status == 1 & sample_id == 3),], check.attributes=F)
#similar to also specifying columns
clin.filt.res.1 <- filter(sample.tracking.db, clinical.status == 1 & clinical.sample_id==3)
expect_equal(as.data.frame(clin.filt.res), as.data.frame(clin.filt.res.1))
#again, partially specifying columns
clin.filt.res.2 <- filter(sample.tracking.db, clinical.status == 1 & sample_id==3)
expect_equal(as.data.frame(clin.filt.res), as.data.frame(clin.filt.res.2))
#undefined columns should break
expect_error(filter(sample.tracking.db, kitten == 1 & sample_id==3))
#for filter, cross table queries should result in an outer join
#not currently supported, but in the future...
#filter(sample.tracking.db, clinical.status == 1 | dna.wave==2)
#filter(sample.tracking.db, clinical.(status == 1 & sex == "m") | dna.wave==2)
})
test_that("sample tracking example but with direct keys between dna and samples", {
db.list <- test.db.1()
sample.tracking <- new("TableSchemaList")
clinical <- makeSchemaFromData(db.list$clinical, name="clinical")
sample.tracking <- append(sample.tracking, clinical)
samples <- makeSchemaFromData(db.list$samples, name="samples")
sample.tracking <- append(sample.tracking, samples)
db.list$dna <- correct.df.names(db.list$dna)
dna <- makeSchemaFromData(db.list$dna, name="dna")
sample.tracking <- append(sample.tracking, dna)
relationship(sample.tracking, from="clinical", to="samples") <- sample_id~sample_id
relationship(sample.tracking, from="clinical", to="dna") <-sample_id~sample_id
#Here, db.cols (and db.schema) should be modified so that sample and wave in samples should be replaced with dna's autoinc pk
relationship(sample.tracking, from="samples", to="dna") <- sample_id+wave~sample_id+wave
temp.st.db <- Database(sample.tracking, tempfile())
do.call(populate, append(list(temp.st.db),db.list))
#this query was seen to fail
samp.dna.join <- select(temp.st.db, .tables=c("samples", "dna"))
test.con <- dbConnect(RSQLite::SQLite(), dbFile(temp.st.db))
#start with some basic select queries
db.tab.list <- lapply(tables(temp.st.db), function(x)
{
dbReadTable(test.con, x)
})
names(db.tab.list) <- tables(temp.st.db)
samp.dna.merge <- merge(db.tab.list$samples, db.tab.list$dna, by=c("sample_id", "wave"), all=F)
samp.dna.merge <- convert.factors.to.strings(samp.dna.merge)
samp.dna.merge <- samp.dna.merge[do.call("order", samp.dna.merge),]
samp.dna.join <- convert.factors.to.strings(as.data.frame(samp.dna.join))[,names(samp.dna.merge)]
samp.dna.join <- samp.dna.join[do.call("order", samp.dna.join),]
expect_equal(samp.dna.merge, samp.dna.join, check.attributes=F)
dbDisconnect(test.con)
})
#igraph::plot.igraph(poplite:::tsl.to.graph(om.schema.obj))
test_that("oligoMask queries that break poplite", {
om.schema.obj <- new("TableSchemaList",tab.list=test.schema.2)
relationship(om.schema.obj, from="allele", to="genotype") <- ref_id+allele_num~ref_id+allele_num
test.db <- tempfile()
temp.con <- dbConnect(RSQLite::SQLite(), test.db)
for(i in names(test.db.2))
{
dbWriteTable(conn=temp.con, name=i, test.db.2[[i]])
}
dbDisconnect(temp.con)
#make a database object
test.database.1 <- Database(om.schema.obj, test.db)
prob.tab <- as.data.frame(select(test.database.1, .tables="probe_info"))
expect_equal(prob.tab, test.db.2$probe_info)
#should all be from probe_info
prob.tab.2 <- as.data.frame(select(test.database.1, probe_id, fasta_name, align_status))
expect_equal(prob.tab.2, test.db.2$probe_info[,c("probe_id", "fasta_name", "align_status")])
all.tab.1 <- as.data.frame(select(test.database.1, .tables=tables(test.database.1)))
all.merge <- test.db.2[[1]]
for(i in names(test.db.2)[-1])
{
all.merge <- merge(all.merge, test.db.2[[i]], all=F)
}
expect_equal(all.tab.1[,names(all.merge)], all.merge)
all.tab.2 <- as.data.frame(select(test.database.1, probe_id, fasta_name, align_status, probe_chr, probe_start, probe_end, seqnames, start,
end, filter, geno_chr, genotype.allele_num, strain))
expect_equal(all.tab.2, all.merge[,c("probe_id", "fasta_name", "align_status", "probe_chr", "probe_start", "probe_end", "seqnames", "start",
"end", "filter", "geno_chr", "allele_num", "strain")])
sel.tab <- as.data.frame(select_(test.database.1, "probe_info.probe_id", "reference.ref_id", "reference.filter", "probe_info.align_status"))
expect_equal(sel.tab, all.merge[,c("probe_id", "ref_id", "filter", "align_status")])
})
test_that("VariantAnnotation basic tests", {
if (require("VariantAnnotation")==F) {
skip("VariantAnnotation not available")
}
fl <- system.file("extdata", "chr22.vcf.gz", package="VariantAnnotation")
vcf <- tryCatch(readVcf(fl, "hg19"), error=function(x) skip("VariantAnnotation can't read its own file"))
populate.ref.table <- function(vcf.obj)
{
ref.dta <- cbind(
seqnames=as.character(seqnames(vcf.obj)),
as.data.frame(ranges(vcf.obj))[,c("start", "end")],
ref=as.character(ref(vcf.obj)),
stringsAsFactors=FALSE
)
return(ref.dta)
}
vcf.sc <- makeSchemaFromFunction(populate.ref.table, "reference", vcf.obj=vcf[1:5])
populate.allele.table <- function(vcf.obj)
{
exp.obj <- expand(vcf.obj)
ref.dta <- cbind(
seqnames=as.character(seqnames(exp.obj)),
as.data.frame(ranges(exp.obj))[,c("start", "end")],
ref=as.character(ref(exp.obj)),
alt=as.character(alt(exp.obj)),
stringsAsFactors=FALSE
)
return(ref.dta)
}
allele.sc <- makeSchemaFromFunction(populate.allele.table, "alleles", vcf.obj=vcf[1:5])
vcf.sc <- poplite::append(vcf.sc, allele.sc)
populate.samp.alt.table <- function(vcf.obj)
{
temp.vrange <- as(vcf.obj, "VRanges")
ret.dta <- cbind(
seqnames=as.character(seqnames(temp.vrange)),
as.data.frame(ranges(temp.vrange))[,c("start", "end")],
ref=ref(temp.vrange),
alt=alt(temp.vrange),
sample=as.character(sampleNames(temp.vrange)),
allele_count=sapply(strsplit(temp.vrange$GT, "\\|"),
function(x) sum(as.integer(x), na.rm=T)),
stringsAsFactors=F
)
return(ret.dta[ret.dta$allele_count > 0,])
}
geno.all.sc <- makeSchemaFromFunction(populate.samp.alt.table, "sample_alleles", vcf.obj=vcf[1:5])
vcf.sc <- poplite::append(vcf.sc, geno.all.sc)
relationship(vcf.sc, from="reference", to="alleles") <- .~seqnames+start+end+ref
relationship(vcf.sc, from="reference", to="sample_alleles") <- .~seqnames+start+end+ref
relationship(vcf.sc, from="alleles", to="sample_alleles") <- .~.reference+alt
vcf.db <- Database(vcf.sc, tempfile())
populate(vcf.db, vcf.obj=vcf[1:1000])
populate(vcf.db, vcf.obj=vcf[1001:2000])
pop.res <- as.data.frame(poplite::select(vcf.db, .tables=tables(vcf.db)))
vrange.tab <- as(vcf[1:2000], "VRanges")
vrange.dta <- data.frame(seqnames=as.character(seqnames(vrange.tab)),
start=start(vrange.tab),
end=end(vrange.tab),
ref=as.character(ref(vrange.tab)),
alt=as.character(alt(vrange.tab)),
sample=as.character(sampleNames(vrange.tab)),
allele_count=sapply(strsplit(vrange.tab$GT, "\\|"),
function(x) sum(as.integer(x), na.rm=T)),
stringsAsFactors=F)
vrange.dta <- vrange.dta[vrange.dta$allele_count > 0,]
vrange.dta <- vrange.dta[do.call("order", vrange.dta),]
sub.pop.res <- pop.res[,names(vrange.dta)]
sub.pop.res <- sub.pop.res[do.call("order", sub.pop.res),]
expect_true(isTRUE(all.equal(sub.pop.res, vrange.dta, check.attributes=F)))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.