#' @rdname MsBackendMassbankSql
#'
#' @export MsBackendMassbankSql
MsBackendMassbankSql <- function() {
if (!requireNamespace("DBI", quietly = TRUE))
stop("'MsBackendMassbankSql' requires package 'DBI'. Please ",
"install with 'install.packages(\"DBI\")'")
new("MsBackendMassbankSql")
}
#' @importFrom DBI dbListTables
#'
#' @noRd
.valid_dbcon <- function(x) {
if (length(x)) {
if (!inherits(x, "DBIConnection"))
return("'dbcon' is expected to be a connection to a database")
tables <- dbListTables(x)
if (!all(c("msms_spectrum", "msms_spectrum_peak") %in% tables))
return(paste0("Database lacks some required tables. Is 'dbcon' a",
" connection to a MassBank database?"))
}
NULL
}
#' Returns the spectra data, from the database and eventually filling with
#' *core* spectra variables, if they are not available in the database.
#'
#' The data can be either:
#' - in the database.
#' - in the local data (if new variables were added with $name <-).
#' - core spectra variables - if they are not in the database they have to be
#' initialized with `NA` and the correct data type.
#'
#' @return a `data.frame` - always, even if only with a single column.
#'
#' @importFrom IRanges NumericList CharacterList
#'
#' @importFrom S4Vectors extractCOLS
#'
#' @importFrom S4Vectors make_zero_col_DFrame
#'
#' @importFrom methods as callNextMethod getMethod
#'
#' @author Johannes Rainer
#'
#' @noRd
.spectra_data_massbank_sql <- function(x, columns = spectraVariables(x)) {
res <- getMethod("spectraData", "MsBackendCached")(x, columns = columns)
if (is.null(res))
res <- make_zero_col_DFrame(length(x))
## Define what needs to be still retrieved.
db_cols <- intersect(columns, x@spectraVariables)
db_cols <- db_cols[!db_cols %in% c("mz", "intensity", colnames(res))]
mz_cols <- intersect(columns, c("mz", "intensity"))
if (length(db_cols)) {
res <- cbind(
res, as(.fetch_spectra_data_sql(x, columns = db_cols), "DataFrame"))
if (any(colnames(res) == "synonym"))
res$synonym <- CharacterList(res$synonym, compress = FALSE)
}
## Get m/z and intensity values
if (length(mz_cols)) {
pks <- .fetch_peaks_sql(x, columns = mz_cols)
f <- factor(pks$spectrum_id)
if (any(mz_cols == "mz")) {
mzs <- unname(split(pks$mz, f)[as.character(x@spectraIds)])
res$mz <- NumericList(mzs, compress = FALSE)
}
if (any(mz_cols == "intensity")) {
ints <- unname(
split(pks$intensity, f)[as.character(x@spectraIds)])
res$intensity <- NumericList(ints, compress = FALSE)
}
}
if (!all(columns %in% colnames(res)))
stop("Column(s) ", paste0(columns[!columns %in% names(res)],
collapse = ", "), " not available.",
call. = FALSE)
extractCOLS(res, columns)
}
#' @importFrom DBI dbSendQuery dbBind dbFetch dbClearResult
#'
#' @noRd
.fetch_peaks_sql <- function(x, columns = c("mz", "intensity")) {
if (length(x@dbcon)) {
dbGetQuery(
x@dbcon,
paste0("select spectrum_id,", paste(columns, collapse = ","),
" from msms_spectrum_peak where spectrum_id in (",
paste0("'", unique(x@spectraIds), "'", collapse = ","),")"))
} else {
res <- data.frame(character(), lapply(columns, function(z) numeric()))
colnames(res) <- c("spectrum_id", columns)
res
}
}
## #' This function ensures that m/z values are ALWAYS returned ordered! Note that
## #' this is slightly faster than including `order by mz` in the SQL query.
## #'
## #' Note however that splitting the data.frame later is slower if the full data
## #' frame is ordered by m/z.
## #'
## #' @noRd
## .fetch_peaks_sql_mz_order <- function(x, columns = c("mz", "intensity")) {
## if (length(x@dbcon)) {
## res <- dbGetQuery(
## x@dbcon,
## paste0("select spectrum_id,", paste(columns, collapse = ","),
## " from msms_spectrum_peak where spectrum_id in (",
## paste0("'", unique(x@spectraIds), "'", collapse = ","),")"))
## res[order(res$mz), , drop = FALSE]
## } else {
## res <- data.frame(character(), lapply(columns, function(z) numeric()))
## colnames(res) <- c("spectrum_id", columns)
## res
## }
## }
.columns_sql <- c(
precursorIntensity = "precursor_intensity",
precursorMz = "precursor_mz_text",
msLevel = "ms_level",
compound_id = "msms_spectrum.compound_id"
)
.map_spectraVariables_to_sql <- function(x) {
for (i in seq_along(.columns_sql))
x <- sub(names(.columns_sql)[i], .columns_sql[i], x, fixed = TRUE)
x
}
.map_sql_to_spectraVariables <- function(x) {
for (i in seq_along(.columns_sql))
x <- sub(.columns_sql[i], names(.columns_sql[i]), x, fixed = TRUE)
x
}
#' Simple helper that creates a join query depending on the provided columns.
#'
#' @param x `Spectra`.
#'
#' @param columns `character` with the column names.
#'
#' @noRd
.join_query <- function(x, columns) {
res <- "msms_spectrum"
if (any(columns %in% x@.tables$ms_compound))
res <- paste0(res, " join ms_compound on (msms_spectrum.compound_id",
"=ms_compound.compound_id)")
res
}
.fetch_spectra_data_sql <- function(x, columns = c("spectrum_id")) {
orig_columns <- columns
if (any(columns %in% c("compound_name", "synonym"))) {
columns <- columns[!columns %in% c("compound_name", "synonym")]
columns <- unique(c(columns, "compound_id"))
}
sql_columns <-
unique(c("spectrum_id", .map_spectraVariables_to_sql(columns)))
## That turns out to be faster than dbBind if we use a field in the
## database that is unique (such as spectrum_id).
res <- dbGetQuery(
x@dbcon,
paste0("select ", paste(sql_columns, collapse = ","), " from ",
.join_query(x, sql_columns), " where spectrum_id in (",
paste0("'", unique(x@spectraIds), "'", collapse = ", ") ,")"))
idx <- match(x@spectraIds, res$spectrum_id)
res <- res[idx[!is.na(idx)], , drop = FALSE]
rownames(res) <- NULL
if (any(columns == "msLevel")) {
res$msLevel <- as.integer(sub("MS", "", res$ms_level))
res$ms_level <- NULL
}
if (any(columns == "polarity")) {
pol <- rep(NA_integer_, nrow(res))
pol[res$polarity == "POSITIVE"] <- 1L
pol[res$polarity == "NEGATIVE"] <- 0L
res$polarity <- pol
}
if (any(columns == "publication"))
res$dataOrigin <- res$publication
if (any(columns == "precursorIntensity")) {
res$precursorIntensity <- as.numeric(res$precursor_intensity)
res$precursor_intensity <- NULL
}
## So far we're not dealing with multiple precursor m/z here!
if (any(columns == "precursorMz")) {
suppressWarnings(
res$precursorMz <- as.numeric(res$precursor_mz_text))
if (!any(columns == "precursor_mz_text"))
res$precursor_intensity_text <- NULL
}
if (any(columns == "collisionEnergy")) {
suppressWarnings(
res$collisionEnergy <- as.numeric(res$collision_energy_text))
res$collision_energy_text <- NULL
}
## manage synonym and compound_name. Need a second query for that.
if (any(orig_columns %in% c("synonym", "compound_name"))) {
cmps <- dbGetQuery(
x@dbcon,
paste0("select * from synonym where compound_id in (",
paste0("'", unique(res$compound_id), "'",
collapse = ","), ")"))
cmpl <- split(
cmps$synonym,
as.factor(cmps$compound_id))[as.character(res$compound_id)]
if (any(orig_columns == "compound_name"))
res$compound_name <- vapply(cmpl, function(z) z[1], character(1))
if (any(orig_columns == "synonym"))
res$synonym <- cmpl
}
res[, orig_columns, drop = FALSE]
}
.compounds_sql <- function(x, id, columns = "*") {
id <- force(id)
qry <- dbSendQuery(
x, paste0("select ", columns, " from ms_compound join synonym on (",
"ms_compound.compound_id=synonym.compound_id)",
" where ms_compound.compound_id = ?"))
qry <- dbBind(qry, list(id))
res <- dbFetch(qry)
dbClearResult(qry)
idx <- grep("^compound_id", colnames(res))
if (length(idx) > 1)
res <- res[, -idx[-1]]
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.