################################################################
## Function 1: Take filenames and create MonetDBlite database ##
################################################################
#' makeDatabase
#'
#' Takes flat text files (comma, tab, or other separated) and converts
#' the data to a MonetDBLite database (<https://github.com/hannesmuehleisen/MonetDBLite>).
#' Given the filepath, file type (PLINK output only, at this time),
#' file extension/suffix and separator, the function automatically pulls
#' chromosome number and phenotype from the file name, marks the data with
#' that phenotype name, and sorts it into a queryable database.
#'
#' @param filesuffix A string indicating the file extension of the data files
#' to be sorted into the database.
#' @param srcfilepath A string indicating the location of the data to be
#' sorted into the database, by default the working directory. File path
#' must be absolute, not relative! (eg no "./")
#' @param dbfilepath A string indicating the location of the database, by default
#' the workind directory. File path must be absolute, not relative! (eg no "./")
#' @param separator A string indicating the separator between the phenotype
#' and chromosome in the first part of the file name, by default ".".
#' @param dbname A string giving the name of the folder that will contain the database, or
#' the name of an existing folder if the data is to be appended to the database. Default "MultiGWASdb".
#' @param ignore A vector of filenames to ignore, that will not be included in the
#' database. You only need to list files here that ALSO have the indicated file suffix-
#' files in the filepath without this suffix will already be ignored.
#' @param pattern An optional string vector to search the filenames for, to subset to only
#' a specific set of data to input (eg only certain chromosomes).
#' @param filetype A string indicating the filetype to be input. At this time only the default
#' "plink" is accepted.
#' @param delimiter A string giving the delimiter in the flat text file, by default tab
#' (\t).
#' @param overwrite Logical, indicating whether a database with the same dbname in the
#' same filepath should be overwritten to create a new database. Default FALSE.
#' @param append Logical, indicating whether data should be appended to a database with
#' the same dbname in the same filepath, if it exists. Default TRUE.
#'
#'
#'
#' @import DBI
#'
#'
#'
#'
#' @export
makeDatabase <- function(filesuffix, srcfilepath = getwd(), dbfilepath= getwd(), separator = ".", dbname = "MultiGWASdb", ignore = NULL,
pattern = NULL, filetype = "plink", delimiter = "\t", overwrite = FALSE, append = TRUE) {
# TODO add SNPTEST format input.
# TODO add ability to input manual column names for pvalue, OR, etc.
a <- proc.time()
# Check that filepath inputs are character strings
if(is.character(srcfilepath) == FALSE | is.character(filesuffix) == FALSE| is.character(separator) == FALSE |
is.character(dbname) == FALSE | is.character(dbfilepath) == FALSE) {
stop("filepaths, filesuffix, separator, and dbdir parameters must be character strings")
}
# Check that filetype parameter is one of the options given
if(filetype %in% c("plink", "snptest", "custom") == FALSE) {
stop("Filetype must be 'plink' or 'snptest'- see manual for description of file inputs")
}
if(filetype %in% c("snptest", "custom") == TRUE) {
stop("We do not yet support SNPTEST or custom header outputs. Please check lea-urpa.github.io/MultiGWASTools for updated development versions of MultiGWASTools")
}
# Check that list of files to ignore is a string of characters
if(!is.null(ignore) & !is.character(ignore)) {
stop("'ignore' must be a string of characters (filenames to ignore)")
}
# Check that logical append/overwrite are mutually exclusive
if(append == TRUE & overwrite == TRUE) {
stop("'append' and 'overwrite' cannot both be set to TRUE")
}
# Check that the given filepath actually exists
if(dir.exists(srcfilepath) == FALSE) {
stop("Given srcfilepath does not exist!")
}
if(dir.exists(dbfilepath) == FALSE) {
stop("Given dbfilepath does not exist!")
}
# If separator is problematic for searching with grep, fix it
if(separator == "."){ separator <- "[.]" }
# If there are files to ignore, wrap in object, otherwise give dummy string
if(!is.null(ignore)) {
ignorefiles <- ignore
} else {
ignorefiles <- "___empty___"
}
# Define database directory
dbdir <- paste(dbfilepath, dbname, sep= "/")
# Warn if append = TRUE and database doesn't exist
if(dir.exists(dbdir) == FALSE & append == TRUE) {
warning("Append set to TRUE, but specified directory does not exist. New database was created.")
}
# Stop if database exists and append & overwrite set to false
if(dir.exists(dbdir) == TRUE & append == FALSE & overwrite == FALSE) {
stop("Database in pathway with specfied dbname already exists. Set either append or overwrite to TRUE to continue.")
}
# If overwrite = TRUE, remove existing directory
if(overwrite == TRUE) {
if(dir.exists(dbdir) == TRUE) {
warning("Deleted and replaced previous database.")
unlink(dbdir, recursive = TRUE)
} else {
warning("Overwrite set to TRUE, but specified database directory does not exist. No data was overwritten.")
}
}
# Create folder if dbdir does not exist
if(dir.exists(dbdir) == FALSE) {
dir.create(dbdir)
}
# Generate list of files in specified filepath, report to user
file_list <- list.files(path=srcfilepath, pattern = paste("*",filesuffix, "$", sep = ""))
if(length(file_list) == 0) {
stop("There are no files with the specified file suffix in the filepath given.")
}
# Check if subset pattern given if so subset the file list
if(!is.null(pattern)) {
if(grepl("[.]", pattern) == TRUE) {
pattern <- gsub("[.]", "[.]", pattern)
}
file_list <- grep(pattern, file_list, value = TRUE)
}
print("List of data files identified in the folder:")
print(file_list)
# Define function to extract phenotype information
extract_phenotypes <- function(file) {
# Extract phenotype
pheno <- unlist(strsplit(file, separator))[1]
pheno <- paste("'",pheno,"'", sep = "")
return(pheno)
}
# Extract phenotypes from files
phenotypes <- sapply(file_list, extract_phenotypes, simplify = FALSE, USE.NAMES = TRUE)
b <- proc.time()
print(paste("Data check and phenotype extraction took:", (b - a)[3]))
# Set up database
a <- proc.time()
con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dbdir)
b <- proc.time()
print(paste("Connecting to DB took ", (b-a)[3]))
tables <- DBI::dbListTables(con)
# check if any tables exist
if( any(c("variants", "phenos", "tests", "results") %in% tables) == TRUE) {
if( any( c("variants", "phenos", "tests", "results") %in% tables) == FALSE){
stop("Some tables (variants, phenos, tests, or results) are missing from database!")
# Disconnect from database
DBI::dbDisconnect(con, shutdown=TRUE)
}
} else {
# Else create the tables
a <- proc.time()
# Start an SQL transaction
DBI::dbBegin(con)
DBI::dbSendQuery(con, "CREATE TABLE variants(
variantid INT PRIMARY KEY AUTO_INCREMENT,
chrom INT,
rsid TEXT,
pos INT,
ref TEXT,
alt1 TEXT,
firth TEXT,
obsct TEXT,
UNIQUE(variantid, chrom, rsid, pos, ref, alt1, firth, obsct)
);" )
DBI::dbSendQuery(con, "CREATE TABLE phenos(
phenoid INT PRIMARY KEY AUTO_INCREMENT,
phenoname TEXT,
UNIQUE(phenoid, phenoname)
);")
DBI::dbSendQuery(con, "CREATE TABLE tests(
testid INT PRIMARY KEY AUTO_INCREMENT,
testname TEXT,
UNIQUE(testid, testname)
);")
DBI::dbSendQuery(con, "CREATE TABLE results(
phenoid INT,
variantid INT,
testid INT,
oddsr FLOAT,
se FLOAT,
tstat FLOAT,
p FLOAT,
FOREIGN KEY (phenoid) REFERENCES phenos(phenoid),
FOREIGN KEY (variantid) REFERENCES variants(variantid),
FOREIGN KEY (testid) REFERENCES tests(testid)
);")
DBI::dbCommit(con)
b <- proc.time()
print(paste("Creating tables took", (b - a)[3] ))
}
# Loop to load in data
for(i in 1:length(file_list)) {
if(file_list[i] %in% ignorefiles == FALSE) {
# Print a ticker for the user
print(paste("Processing data into MonetDB database for file", file_list[i]))
a <- proc.time()
# Get phenotype
pheno <- phenotypes[file_list[i]]
# Get number of records in file
nrec <- system(paste("cat ", srcfilepath, "/", file_list[i], "| wc -l", sep = ""), intern = TRUE)
nrec <- as.numeric(nrec)
b <- proc.time()
print(paste("Getting pheno and number of records took", (b - a)[3]))
# Start an SQL transaction
DBI::dbBegin(con)
a <- proc.time()
# Manually define temptable and load in data
if(filetype == "plink" & i == 1) {
DBI::dbSendQuery(con, 'CREATE TABLE temptable (
"X.CHROM" INT,
"POS" INT,
"ID" TEXT,
"REF" TEXT,
"ALT1" TEXT,
"FIRTH." TEXT,
"TEST" TEXT,
"OBS_CT" INT,
"OR" FLOAT,
"SE" FLOAT,
"T_STAT" FLOAT,
"P" FLOAT
);')
# From what I understand float is OK, since we have less than 8 places accuracy in input data anyway
} else if (filetype == "snptest" | filetype == "custom") {
stop("snptest or custom file output not supported yet. How did you even get this far anyway?")
}
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
DBI::dbSendQuery(con, paste0("COPY ", nrec, " OFFSET 2 RECORDS INTO temptable FROM '", paste(srcfilepath, file_list[i], sep= "/"),
"' USING DELIMITERS '", delimiter, "','\n','\"' NULL as 'NA'"))
b <- proc.time()
print(paste("Creating temptable and inserting data took:", (b - a)[3] ))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
# Extract data repeated for each rsid to variants table
a <- proc.time()
DBI::dbSendQuery(con, 'INSERT INTO variants (chrom, rsid, pos, ref, alt1, firth, obsct)
SELECT "X.CHROM","ID","POS", "REF", "ALT1","FIRTH.", "OBS_CT"
FROM temptable
WHERE NOT EXISTS (SELECT 1
FROM variants
WHERE variants.rsid = temptable."ID"
AND variants.ref = temptable."REF"
AND variants.alt1 = temptable."ALT1"
AND variants.firth = temptable."FIRTH."
AND variants.obsct = temptable."OBS_CT")
GROUP BY "X.CHROM","ID","POS", "REF", "ALT1","FIRTH.", "OBS_CT"' )
b <- proc.time()
print(paste("Extracting unique variants took", (b - a)[3] ))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
# IF test not already in table, insert test
a <- proc.time()
DBI::dbSendQuery(con, 'INSERT INTO tests (testname)
SELECT "TEST"
FROM temptable
WHERE NOT EXISTS (SELECT 1
FROM tests
WHERE tests.testname = temptable."TEST")
GROUP BY "TEST"')
b <- proc.time()
print(paste("Extracting unique tests took", (b - a)[3] ))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
# IF phenotype not already in table, insert phenotype into phenos table
a <- proc.time()
tablephenos <- DBI::dbGetQuery(con, 'SELECT phenoname FROM phenos')
pheno_noquote <- gsub("'", "", pheno)
if( pheno_noquote %in% tablephenos$phenoname == FALSE ) {
DBI::dbSendQuery(con, paste('INSERT INTO phenos (phenoname) VALUES (', pheno , ')'))
}
b <- proc.time()
print(paste("Extracting unique phenotype took", (b - a)[3] ))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
a <- proc.time()
# Save phenoid as R object
phenoid <- DBI::dbGetQuery(con, paste('SELECT phenoid FROM phenos WHERE phenoname =', pheno))
# Add phenoid column to temp table, set value to your saved phenoid
DBI::dbSendQuery(con, 'ALTER TABLE temptable ADD phenoid INT')
DBI::dbSendQuery(con, paste('UPDATE temptable SET phenoid =', phenoid))
b <- proc.time()
print(paste("Identifying phenoid and inserting to temptable took", (b - a)[3] ))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
# Create temptable2, insert data by left joining temptable with variants table
a <- proc.time()
if( i == 1){
DBI::dbSendQuery(con, 'CREATE TABLE temptable2 (
test TEXT,
oddsr FLOAT,
se FLOAT,
tstat FLOAT,
p FLOAT,
phenoid TEXT,
variantid TEXT
);')
}
b <- proc.time()
print(paste("Creating temptable2 took", (b - a)[3]))
a <- proc.time()
DBI::dbSendQuery(con, 'INSERT INTO temptable2 (test, oddsr, se, tstat, p, phenoid, variantid)
SELECT "TEST", "OR", "SE", "T_STAT", "P", phenoid, variantid
FROM temptable
LEFT JOIN variants
ON temptable."ID" = variants.rsid
AND temptable."REF" = variants.ref
AND temptable."ALT1" = variants.alt1
AND temptable."FIRTH." = variants.firth
AND temptable."OBS_CT" = variants.obsct
')
b <- proc.time()
print(paste("Inserting joined data to temptable2 took", (b - a)[3]))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
# Create temptable3, insert data by left joining temptable2 with tests table
a <- proc.time()
if(i == 1){
DBI::dbSendQuery(con, 'CREATE TABLE temptable3 (
oddsr FLOAT,
se FLOAT,
tstat FLOAT,
p FLOAT,
phenoid TEXT,
variantid TEXT,
testid TEXT)')
}
b <- proc.time()
print(paste("Creating temptable3 took", (b-a)[3]))
a <- proc.time()
DBI::dbSendQuery(con, 'INSERT INTO temptable3 (oddsr, se, tstat, p, phenoid, variantid, testid)
SELECT oddsr, se, tstat, p, phenoid, variantid, testid
FROM temptable2
LEFT JOIN tests
ON temptable2.test = tests.testname')
b <- proc.time()
print(paste("Inserting joined data to temptable3 took", (b-a)[3]))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
a <- proc.time()
# Insert important data into results table
DBI::dbSendQuery(con, 'INSERT INTO results (oddsr, se, tstat, p, phenoid, variantid, testid)
SELECT oddsr, se, tstat, p, phenoid, variantid, testid
FROM temptable3')
b <- proc.time()
print(paste("Inserting temptable data into results table took", (b - a)[3]))
DBI::dbCommit(con)
#Begin next transaction
DBI::dbBegin(con)
a <- proc.time()
# Drop or delete from temptables (also drops associated indices)
if(i == length(file_list)){
DBI::dbSendQuery(con, 'DROP TABLE temptable')
DBI::dbSendQuery(con, 'DROP TABLE temptable2')
DBI::dbSendQuery(con, 'DROP TABLE temptable3')
} else {
DBI::dbSendQuery(con, 'DELETE FROM temptable')
DBI::dbSendQuery(con, "ALTER TABLE temptable DROP COLUMN phenoid")
DBI::dbSendQuery(con, 'DELETE FROM temptable2')
DBI::dbSendQuery(con, 'DELETE FROM temptable3')
}
b <- proc.time()
print(paste("Deleting from temptables took", (b -a)[3] ))
DBI::dbCommit(con)
} else {
print(paste("Ignoring file", file_list[i]))
}
}
# Disconnect from database
DBI::dbDisconnect(con, shutdown=TRUE)
}
###############################################
## Function 1.5: Add indices on the database ##
###############################################
# This is faster than dropping and adding again the indices...
#' addIndices
#'
#' Connects to previously created database (currently only a MonetDBLite database
#' created with makeDatabase)
#' and adds indices to the database.
#'
#' @param dbdir A string giving the directory (filepath + folder name) of the database to
#' connect to. The file path must be absolute, not relative (eg no "./").
#' @param drop Logical indicating whether the indicies should be dropped (TRUE), otherwise the
#' indices will be created (FALSE).
#'
#' @import DBI
#'
#'
#'
#'
#' @export
addIndices <- function(dbdir, drop = FALSE){
if(dir.exists(dbdir) == FALSE) {
stop("Database directory specified does not exist")
}
# Connect to database
print(paste("Connecting to database ", dbdir))
a <- proc.time()
try(con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dbdir))
b <- proc.time()
print(paste("Connecting to database took", (b-a)[3]))
if (drop == FALSE){
#Begin next transaction
DBI::dbBegin(con)
print(paste0("Creating indices for ", dbdir))
a <- proc.time()
try(DBI::dbSendQuery(con, "CREATE INDEX Idx1 ON variants (rsid)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx2 ON phenos (phenoname)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx3 ON tests (testname)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx4 ON results (testid)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx5 ON results (variantid)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx6 ON results (phenoid)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx7 ON variants (variantid)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx8 ON phenos (phenoid)"))
try(DBI::dbSendQuery(con, "CREATE INDEX Idx9 ON tests (testid)"))
b <- proc.time()
print(paste("Creating indices took", (b - a)[3]))
DBI::dbCommit(con)
} else if (drop == TRUE) {
#Begin next transaction
DBI::dbBegin(con)
print(paste("Dropping indices for ", dbdir))
a <- proc.time()
try(DBI::dbSendQuery(con, "DROP INDEX Idx1"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx2"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx3"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx4"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx5"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx6"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx7"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx8"))
try(DBI::dbSendQuery(con, "DROP INDEX Idx9"))
b <- proc.time()
print(paste("Dropping indices took", (b - a)[3]))
DBI::dbCommit(con)
}
# Disconnect from database
a <- proc.time()
DBI::dbDisconnect(con, shutdown=TRUE)
b <- proc.time()
print(paste("Disconnecting from database took", (b-a)[3]))
}
#######################################################################################
## Function 2: Take list of rsids/ positions, extract data from database to R object ##
#######################################################################################
#' extractData
#'
#' Connects to previously created database (currently only a MonetDBLite database
#' created with makeDatabase)
#' and queries specific data from the database. Essentially a wrapper for
#' MonetDBLite query statements, without having to learn SQL query syntax. Users
#' can provide either a vector of rsids to query the database with OR a vector
#' of start/stop positions along a given chromosome. One can also threshold the
#' data with minimum p value, test (additive or other), or phenotype.
#'
#' @param dbdir A string giving the directory (filepath + folder name) of the database to
#' connect to. The file path must be absolute, not relative (eg no "./").
#' @param rsids A vector containing the rsids to query the database. Must provide
#' either rsids or positions, not both!
#' @param pos A vector containing three numbers, the chromosome, start and stop positions
#' along that chromosome to query the database. Must provide either rsids or
#' positions, not both!
#' @param pvalThresh A number indicating the minimum threshold of pvalues for the
#' data to return, by default all (pvalue = 1).
#' @param phenotype A string or vector of strings containing the phenotypes that will
#' be extracted from the data, by default all phenotypes.
#' @param test A string or vector of strings containing the tests that will be
#' extracted from the data, by default all tests.
#' @param dbtype A string indicating database type. Currently only the default "MonetDB" accepted.
#'
#' @import DBI
#'
#'
#'
#'
#' @export
extractData <- function( dbdir, rsids = NULL, pos = NULL, pvalThresh = 1, phenotype = "all", test = "all", dbtype = "MonetDB") {
# Check inputs
if(dbtype != "MonetDB") {
stop("dbtype must be a string indicating databaset type. MongoDB not yet supported.")
}
if(is.null(rsids) == TRUE & is.null(pos) == TRUE) {
stop("Specify either rsids or pos to query from database")
}
if(is.null(rsids) == FALSE & is.null(pos) == FALSE) {
stop("Specify only one: rsids or pos to query from database")
}
if(is.numeric(pvalThresh) == FALSE) {
stop(" pvalThresh argument must be a number")
}
if(dir.exists(dbdir) == FALSE) {
stop("Database directory specified does not exist")
}
if(is.null(pos) == FALSE ) {
if(is.vector(pos) == FALSE | length(pos) != 3) {
stop("Positions must be a vector of length three (chrom, start position, stop position)")
}
if(pos[1] %in% c(1:22,"x","y","X","Y") == FALSE) {
stop("Chromosome in pos vector must be 1-22, 'x', 'y', 'X', or 'Y'.")
}
if(pos[3] < pos[2]) {
stop("End position in pos vector must be greater than start position!")
}
}
# Connect to database
con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dbdir)
# If phenotype set to all in database, save that as a vector to iterate through
if(phenotype == "all") {
#Begin next transaction
DBI::dbBegin(con)
phenos <- DBI::dbGetQuery(con, "SELECT phenos.phenoname FROM phenos")
phenotype <- as.vector(phenos$phenoname)
DBI::dbCommit(con)
}
# If test set to all in database, save that as a vector to iterate through (limit as overshot of test)
if(test == "all") {
#Begin next transaction
DBI::dbBegin(con)
test <- DBI::dbGetQuery(con, "SELECT DISTINCT results.testid FROM results LIMIT 60")
test <- as.vector(test$test)
DBI::dbCommit(con)
}
# Extract data by user specifications
for(i in 1:length(phenotype)) {
for(j in 1:length(test)) {
if(is.null(rsids) == FALSE) {
# If rsids given, iterate through list of rsids
for(k in 1:length(rsids)) {
print(paste("Extracting data for pheno:", phenotype[i], "test:", test[j], "rsid:", rsids[k], "from database, where pval <", pvalThresh))
#Begin next transaction
DBI::dbBegin(con)
tempres <- DBI::dbGetQuery(con,
paste("SELECT rsid, chrom, pos, ref, alt1, firth, obsct, testname, phenoname, oddsr, se, tstat, p
FROM variants
JOIN results
ON results.variantid = variants.variantid
JOIN phenos
ON results.phenoid = phenos.phenoid
JOIN tests
ON results.testid = tests.testid
WHERE phenos.phenoname = \'", phenotype[i], "\' AND tests.testname = \'", test[j], "\' AND variants.rsid = \'", rsids[k], "\'",
"AND p <", pvalThresh, sep = ""
))
DBI::dbCommit(con)
if(nrow(tempres) == 0) {
warning(paste("No data in database for rsid:", rsids[k], " phenotype:", phenotype[i], "test:", test[j], " at pvalue less than", pvalThresh))
}
if(i == 1 & j== 1 & k == 1 ){
res <- tempres
} else {
res <- rbind(res, tempres)
}
}
} else {
# If positions given, extract all with specified values
print(paste("Extracting data for phenotype:", phenotype[i], "test:", test[j], "between pos", pos[2], "and", pos[3], "where pval <", pvalThresh ))
#Begin next transaction
DBI::dbBegin(con)
tempres <- DBI::dbGetQuery(con,
paste(" SELECT rsid, chrom, pos, ref, alt1, firth, obsct, testname, phenoname, oddsr, se, tstat, p
FROM variants
JOIN results
ON results.variantid = variants.variantid
JOIN phenos
ON results.phenoid = phenos.phenoid
JOIN tests
ON results.testid = tests.testid
WHERE phenos.phenoname = \'", phenotype[i], "\' AND tests.testname = \'", test[j], "\' AND p <", pvalThresh,
"AND chrom = ", pos[1], "AND pos BETWEEN ", pos[2], " AND ", pos[3], sep = ""
))
DBI::dbCommit(con)
if(nrow(tempres) == 0){
warning(paste("No data in database for phenotype:", phenotype[i], "test:", test[j], "between", pos[2], "and", pos[3], "at pvalue less than", pvalThresh))
}
if(i == 1 & j == 1 ) {
res <- tempres
} else {
res <- rbind(res, tempres)
}
}
}
}
# Disconnect from database
DBI::dbDisconnect(con, shutdown=TRUE)
return(res)
}
############################################################################################
## Small functions for users to check data: tables, fields in tables, and heads of tables ##
############################################################################################
#' listTables
#'
#' A convenience function, simply a wrapper for MonetDBLite's dbListTables
#'
#' @param dbdir A string giving the directory (filepath + folder name) of the database to
#' connect to. The file path must be absolute, not relative (eg no "./").
#'
#' @import DBI
#'
#'
#' @export
listTables <- function(dbdir) {
con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dbdir)
tables <- DBI::dbListTables(con)
DBI::dbDisconnect(con, shutdown=TRUE)
return(tables)
}
#' listColumnNames
#'
#' A convenience function, simply a wrapper for MonetDBLite's dbListFields
#'
#' @param dbdir A string giving the directory (filepath + folder name) of the database to
#' connect to. The file path must be absolute, not relative (eg no "./").
#' @param table A string giving the name of the table in the database whose column names
#' should be listed.
#'
#' @import DBI
#'
#'
#' @export
listColumnNames <- function(dbdir, table) {
if(is.character(table) == FALSE){
stop("table must be the name of one of the tables in your database (variants, phenos, tests, or results)")
}
con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dbdir)
names <- DBI::dbListFields(con, table)
DBI::dbDisconnect(con, shutdown=TRUE)
return(names)
}
#' headTable
#'
#' A convenience function, simply a wrapper for MonetDBLite's dbGetQuery, querying the
#' specified table for first n rows.
#'
#' @param dbdir A string giving the directory (filepath + folder name) of the database to
#' connect to. The file path must be absolute, not relative (eg no "./").
#' @param table A string giving the name of the table in the database whose column names
#' should be listed.
#' @param n A number giving the first n rows of the table that should be displated.
#'
#' @import DBI
#'
#'
#' @export
headTable <- function(dbdir, table, n) {
if(is.numeric(n) == FALSE) {
stop("n must be a number")
}
if(is.character(table) == FALSE) {
stop("table must be the name of one of the tables in your database (variants, phenos, tests, or results)")
}
con <- DBI::dbConnect(MonetDBLite::MonetDBLite(), dbdir)
#Begin next transaction
DBI::dbBegin(con)
head <- DBI::dbGetQuery(con, paste("SELECT * FROM", table, "LIMIT", n))
DBI::dbCommit(con)
DBI::dbDisconnect(con, shutdown=TRUE)
return(head)
}
###########################
## html widget functions ##
###########################
#' MultiVariantsViewer
#'
#' Takes a dataset (eg the output of extractData) and plots the data interactively. Data must
#' contain headers 'chrom', 'rsid', 'pos', 'ref', 'alt1', 'firth', 'obsct', 'testname', 'oddsr',
#' 'se', 'tstat', 'p', and 'phenoname'.
#'
#' @param dataset The dataset, the result of extractData, that will be plotted.
#' @param width Optional plot width specification.
#' @param width Optional plot height specification.
#' @param elementId Optional plot html element id.
#'
#' @import htmlwidgets
#'
#'
#' @export
MultiVariantViewer <- function(dataset, width = NULL, height = NULL, elementId = NULL) {
if( all(colnames(dataset) %in% c('chrom', 'rsid', 'pos','ref','alt1','firth','obsct','testname','oddsr','se','tstat','p','phenoname')) == FALSE ){
stop("Dataset must contain headers 'chrom', 'rsid', 'pos','ref','alt1','firth','obsct','testname','oddsr','se','tstat','p', and 'phenoname'")
}
# Create list of unique ids (keys) for each data point
keys <- paste(dataset$chrom, dataset$pos, dataset$rsid, dataset$phenoname, dataset$test, sep="_")
# Find max and min plot values
ORrange <- list( min = min(dataset$oddsr), max = max(dataset$oddsr) )
Prange <- list(min = min(-log10(dataset$p)), max = max(-log10(dataset$p)))
# Find unique rsids, phenotypes, and tests
rsids <- unique(dataset$rsid)
phenonames <- unique(dataset$phenoname)
tests <- unique(dataset$test)
# Get vector of colors to use for later graphing
if(length(phenonames) > length(tests)) {
colors <- rainbow( length(phenonames), v = 0.8)
} else {
colors <- rainbow( length(tests), v = 0.8)
}
# Take off opacity tag for browser compatability
colors <- substr(colors, 1,7)
data <- list(keys = keys, ids = dataset$rsid, chrom=dataset$chrom, pos=dataset$pos, pval= -log10(dataset$p),
oddsr=dataset$oddsr, obsct=dataset$obsct,
pheno=dataset$phenoname, test=dataset$testname, ORrange=ORrange, Prange=Prange, rsids=rsids,
phenonames= phenonames, tests=tests, colors = colors)
# create widget
htmlwidgets::createWidget(
name = 'MultiVariantViewer',
data,
width = width,
height = height,
package = 'MultiGWASTools',
elementId = elementId
)
}
#' SingleVariantViewer
#'
#' Takes the dataset for a single variant (eg the output of extractData) and plots the data interactively.
#' Data must contain headers 'chrom', 'rsid', 'pos', 'ref', 'alt1', 'firth', 'obsct', 'testname', 'oddsr',
#' 'se', 'tstat', 'p' and 'phenoname'.
#'
#' @param dataset The dataset, the results of extractData, that will be plotted.
#' @param width Optional plot width specification.
#' @param width Optional plot height specification.
#' @param elementId Optional plot html element id.
#'
#' @import htmlwidgets
#'
#' @export
SingleVariantViewer <- function(dataset, width = NULL, height = NULL, elementId = NULL ) {
if( all(colnames(dataset) %in% c('chrom', 'rsid', 'pos','ref','alt1','firth','obsct','testname','oddsr','se','tstat','p','phenoname')) == FALSE ){
stop("Dataset must contain headers 'chrom', 'rsid', 'pos','ref','alt1','firth','obsct','testname','oddsr','se','tstat','p', and 'phenoname'")
}
if(length(unique(dataset$rsid)) > 1) {
stop('This dataset contains more than one RSID. SingleVariantViewer should be used with only one RSID.')
}
# Create list of unique ids (keys) for each data point
keys <- paste(dataset$chrom, dataset$pos, dataset$rsid, dataset$phenoname, dataset$test, sep="_")
# Find max and min plot values
ORrange <- list( min = min(dataset$oddsr), max = max(dataset$oddsr) )
Prange <- list(min = min(-log10(dataset$p)), max = max(-log10(dataset$p)))
# Find unique rsids, phenotypes, and tests
rsids <- unique(dataset$rsid)
phenonames <- unique(dataset$phenoname)
tests <- unique(dataset$test)
# Get vector of colors to use for later graphing
if(length(phenonames) > length(tests)) {
colors <- rainbow( length(phenonames), v = 0.8)
} else {
colors <- rainbow( length(tests), v = 0.8)
}
# Take off opacity tag for browser compatability
colors <- substr(colors, 1,7)
data <- list(keys = keys, ids = dataset$rsid, chrom=dataset$chrom, pos=dataset$pos, pval= -log10(dataset$p),
oddsr=dataset$oddsr, obsct=dataset$obsct,
pheno=dataset$phenoname, test=dataset$testname, ORrange=ORrange, Prange=Prange, rsids=rsids,
phenonames= phenonames, tests=tests, colors = colors)
# create widget
htmlwidgets::createWidget(
name = 'SingleVariantViewer',
data,
width = width,
height = height,
package = 'MultiGWASTools',
elementId = elementId
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.