#' Read cfm-predict standard output into R object
#'
#' @param rst Standard out from a completed system2 call to cfm-predict
#'
#' @return List containing predicted spectra (and annotations, if present)
#' @export
#'
cfm_predict_stdout <- function(rst){
rst_break <- which(sapply(rst, nchar) == 0)
spec <- rst[1:(rst_break-1)]
spec_f <- cumsum(as.numeric(grepl('energy', spec)))
spec_breaks <- !grepl('energy', spec) & spec != ""
spec_split <- stats::setNames(
split(spec[spec_breaks], spec_f[spec_breaks]),
spec[grepl('energy', spec)])
spec_out <- lapply(spec_split, function(x){
do.call('rbind', lapply(x, function(xx) {
y <- strsplit(xx, ' ')
data.frame(matrix(
c(y[[1]][1:2]),
ncol = 2,
byrow = T,
dimnames = list(c(), c('mz', 'int'))
),
stringsAsFactors = F)
}))
})
if (rst_break == length(rst)) {
out <- spec_out
} else {
spec_ann <- lapply(spec_split, function(x) {
sapply(strsplit(x, ' '), function(xx) {
paste0(xx[3:length(xx)], collapse = ' ')
})
})
annotation <-
data.frame(matrix(do.call('rbind', strsplit(rst[(rst_break + 1):length(rst)], ' ')),
ncol = 3,
dimnames = list(c(), c('idx', 'mz', 'smiles'))), stringsAsFactors = F)
out <-
list(
spec = spec_out,
spec_ann = spec_ann,
annotation = annotation
)
}
return(out)
}
#' Read cfm_predict results from output_filename
#'
#' @param output_filename A text file containing the output from cfm_predict.
#'
#' @return List containing predicted spectra (and annotations, if present)
#' @export
#'
cfm_predict_readfile <- function(output_filename) {
rst <-
utils::read.delim(
file = output_filename,
header = F,
check.names = F,
stringsAsFactors = F
)[, 1]
rst <- append(rst, "", after = which(grepl('^0', rst)) - 1)
cfm_predict_stdout(rst = rst)
}
#' Read results of batch CFM prediction
#'
#' @param out_dir A path to a directory containing text files generated by
#' \code{cfm_predict_batch}
#' @param table_name A name to be used for the resultant database table
#' @param output_path A path to a SQLite database file for writing the results
#'
#' @return Writes table to sql lite database file designated by output_path
#' @export
#'
cfm_read_batch <-
function(out_dir = NULL,
table_name = NULL,
output_path = NULL) {
stopifnot(exprs = {
!is.null(out_dir)
!is.null(table_name)
!is.null(output_path)
})
tmp <-
dplyr::tibble(filename = fs::dir_ls(out_dir, glob = '*.txt')) %>%
dplyr::mutate(ID = gsub(".txt", "", x = basename(filename))) %>%
dplyr::rowwise(.) %>%
dplyr::mutate(spectrum = blob::as_blob(readBin(filename, 'raw', 1e6))) %>%
dplyr::ungroup(.)
if (!is.null(output_path)) {
conn <- DBI::dbConnect(RSQLite::SQLite(), output_path)
on.exit(DBI::dbDisconnect(conn))
DBI::dbWriteTable(conn, table_name, tmp)
} else{
return(tmp)
}
}
#' Read CFM precomputed spectra from a database created by cfm_read_batch
#'
#' @param db_file [string] Full path to the database file
#' @param table_name [string] Name of table containing the CFM spectra to read
#' @param ID [string] A vector of ID(s) to retrieve from the database
#' @param return_annotation [logical] Should spectra be returned with annotations
#'
#' @return A named list of spectra when selected table contains a column named
#' 'spectrum'. Otherwise a tibble.
#' @export
#'
cfm_read_db <-
function(db_file = NULL,
table_name = NULL,
ID = NULL,
return_annotation = F) {
db_file <- normalizePath(db_file, mustWork = T)
stopifnot(!is.null(table_name))
conn <- DBI::dbConnect(RSQLite::SQLite(), db_file)
on.exit(DBI::dbDisconnect(conn))
stopifnot(table_name %in% DBI::dbListTables(conn))
if (!is.null(ID)) {
id_match <- dplyr::tbl(conn, table_name) %>%
dplyr::filter(ID %in% !!ID)
if(dplyr::count(id_match) %>% dplyr::pull() == 0){
cat("No predicted spectra found for ID(s):\n", ID)
return(NULL)
}
a <- dplyr::as_tibble(id_match)
} else {
a <- dplyr::tbl(conn, table_name) %>%
dplyr::as_tibble()
}
if ('spectrum' %in% colnames(a)) {
b <- purrr::map(
a$spectrum,
cfm_parse_spec,
return_annotation = return_annotation
)
out <- rlang::set_names(b, a$ID)
} else {
out <- a
}
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.