R/MsBackendHdf5Peaks-functions.R

Defines functions .h5_write_peaks .h5_read_peaks .hdf5_compression_level .h5_read_bare .valid_h5peaks_file .initialize_h5peaks_file .valid_h5files MsBackendHdf5Peaks .valid_ms_backend_mod_count

Documented in MsBackendHdf5Peaks

#' @include hidden_aliases.R
NULL

.valid_ms_backend_mod_count <- function(x, y) {
    if (length(x) != length(y))
        "Different number of source files and modification counters."
    else
        NULL
}

#' @rdname MsBackend
#'
#' @export MsBackendHdf5Peaks
MsBackendHdf5Peaks <- function() {
    if (!requireNamespace("rhdf5", quietly = TRUE))
        stop("The use of 'MsBackendHdf5Peaks' requires package 'rhdf5'. ",
             "Please install with 'BiocManager::install(\"rhdf5\")'")
    new("MsBackendHdf5Peaks")
}

#' @description
#'
#' - check that lengths of `x` and `files` matches.
#' - check that `x` contains unique names.
#' - check that `x` exist.
#' - check that `x` are in the correct format.
#'
#' @param x `character` with the file names of the Hdf5 files.
#'
#' @author Johannes Rainer
#'
#' @noRd
.valid_h5files <- function(x) {
    msg <- NULL
    if (length(x)) {
        if (length(x) != length(unique(x)))
            msg <- c(msg, paste0("No duplicated HDF5 files allowed"))
        if (!all(file.exists(x)))
            msg <- c(msg, paste0("File(s) ", paste(x[!file.exists(x)],
                                                   collapse = ", "),
                                 " do not exist"))
        msg <- c(msg, unlist(lapply(x, .valid_h5peaks_file), use.names = FALSE))
    }
    msg
}

#' @description
#'
#' Initializes a **new** hdf5 file.
#'
#' @noRd
.initialize_h5peaks_file <- function(x, modCount = 0L) {
    h5 <- rhdf5::H5Fcreate(x)
    on.exit(invisible(rhdf5::H5Fclose(h5)))
    comp_level <- .hdf5_compression_level()
    rhdf5::h5createGroup(h5, "header")
    rhdf5::h5write("Spectra::MsBackendHdf5Peaks", h5, "/header/class",
                   level = comp_level)
    rhdf5::h5write(modCount, h5, "/header/modcount", level = comp_level)
    rhdf5::h5createGroup(h5, "peaks")
}

.valid_h5peaks_file <- function(x) {
    fapl <- rhdf5::H5Pcreate("H5P_FILE_ACCESS")
    on.exit(rhdf5::H5Pclose(fapl))
    msg <- NULL
    fid <- try(.Call("_H5Fopen", x, 0L, fapl@ID, PACKAGE = "rhdf5"))
    if (is(fid, "try-error"))
        return(paste0("File ", x, " is not a Hdf5 file"))
    on.exit(invisible(.Call("_H5Fclose", fid, PACKAGE = "rhdf5")))
    cls <- try(.h5_read_bare(fid, "/header/class"))
    if (is(cls, "try-error"))
        msg <- "Wrong Hdf5 file format: element /header/class not found"
    else {
        if (cls != "Spectra::MsBackendHdf5Peaks")
            msg <- "Wrong Hdf5 file format: unexpected class"
    }
    modc <- try(.h5_read_bare(fid, "/header/modcount"))
    if (is(modc, "try-error")) {
        msg <- c(msg, paste0("Wrong Hdf5 file format: element /header/modcount",
                             " not found"))
    } else {
        if (!is.integer(modc))
            msg <- c(msg, paste0("Wrong Hdf5 file format: modification counter",
                                 "is not an integer"))
    }
    if (length(msg)) msg
    else NULL
}

#' This is based on Mike Smith's function (issue #395) that directly accesses
#' the data without validation and file checking.
#'
#' @param file the ID as returned by `_H5Fopen`.
#'
#' @param name `character` defining the data set to be read.
#'
#' @return the imported data set (in most cases a `matrix`).
#'
#' @author Mike Smith, Johannes Rainer
#'
#' @md
#'
#' @noRd
.h5_read_bare <- function(file, name = "") {
    did <- .Call("_H5Dopen", file, name, NULL, PACKAGE = "rhdf5")
    res <- .Call("_H5Dread", did, NULL, NULL, NULL, TRUE, 0L, FALSE, FALSE,
                 PACKAGE = "rhdf5")
    invisible(.Call("_H5Dclose", did, PACKAGE = "rhdf5"))
    res
}

.hdf5_compression_level <- function() {
    getOption("HDF5_COMPRESSION_LEVEL", 3L)
}

#' Read peak data from an hdf5 file and return a list of `matrix` objects with
#' columns `"mz"` and `"intensity"`.
#'
#' @param x `character`: the name of the HDF5 file.
#'
#' @param scanIndex `integer` with the scan indices/ids.
#'
#' @param modCount `integer`: the modification counter stored in the R object
#'     This is checked against one stored in the hdf5 file and an error is
#'     thrown if they differ, i.e. if the object was copied and the hdf5 files
#'     modified by the original/source object.
#'
#' @param columns `character` with the column names to return. Only values
#'     `"mz"` and `"intensity"` are currently supported (in any order).
#'
#' @return list of `matrix` objects with the peak data.
#'
#' @author Johannes Rainer
#'
#' @md
#'
#' @noRd
.h5_read_peaks <- function(x = character(), scanIndex = integer(),
                           modCount = 0L, columns = c("mz", "intensity")) {
    if (length(x) != 1)
        stop("'x' should have length 1")
    if (!length(scanIndex))
        return(list(matrix(ncol = 2, nrow = 0,
                           dimnames = list(character(), c("mz", "intensity")))))
    requireNamespace("rhdf5", quietly = TRUE)
    fapl <- rhdf5::H5Pcreate("H5P_FILE_ACCESS")
    on.exit(rhdf5::H5Pclose(fapl))
    fid <- .Call("_H5Fopen", x, 0L, fapl@ID, PACKAGE = "rhdf5")
    on.exit(invisible(.Call("_H5Fclose", fid, PACKAGE = "rhdf5")))
    h5modCount <- .h5_read_bare(fid, "/header/modcount")
    if (h5modCount != modCount)
        stop("The data in the hdf5 files associated with this object appear ",
             "to have changed! Please see the Notes section in ?MsBackend ",
             "for more information.")
    cn <- c("mz", "intensity")
    if (length(columns) == 2L && all(columns == c("mz", "intensity"))) {
        read_fun <- function(z, file, cn, columns, ...) {
            res <- .h5_read_bare(name = z, file = file)
            colnames(res) <- c("mz", "intensity")
            res
        }
    } else {
        read_fun <- function(z, file, cn, columns, ...) {
            res <- .h5_read_bare(name = z, file = file)
            colnames(res) <- c("mz", "intensity")
            res[, columns, drop = FALSE]
        }
    }
    base::lapply(paste0("/spectra/", scanIndex), read_fun, file = fid,
                 cn = cn, columns = columns)
}

#' Write spectra of an mzML file to a group within a hdf5 file. Depending on
#' the value of `prune` the hdf5 group will be deleted before writing the data.
#' This is required as datasets in a hdf5 file can not be *updated* if their
#' dimensions don't match.
#'
#' @param x `list` of `matrix` objects with columns `"mz"` and `"intensity"`.
#'
#' @param scanIndex `integer` with the scan index for the peaks of each
#'     spectrum.
#'
#' @param h5file `character(1)` with the name of the hdf5 file into which the
#'     data should be written.
#'
#' @param modCount `integer`, modification counter, incremented by one for each
#'     write
#'
#' @param prune `logical(1)` whether the group should be deleted before writing
#'     the data (see description above for more details).
#'
#' @author Johannes Rainer
#'
#' @md
#'
#' @noRd
.h5_write_peaks <- function(x = list(), scanIndex = integer(), h5file,
                              modCount = 0L, prune = TRUE) {
    if (length(x) != length(scanIndex))
        stop("lengths of 'x' and 'scanIndex' have to match")
    if (any(duplicated(scanIndex)))
        stop("no duplicated values in 'scanIndex' allowed")
    requireNamespace("rhdf5", quitely = TRUE)
    h5 <- rhdf5::H5Fopen(h5file)
    on.exit(invisible(rhdf5::H5Fclose(h5)))
    comp_level <- .hdf5_compression_level()
    x <- force(x)
    if (prune && rhdf5::H5Lexists(h5, "/spectra"))
        rhdf5::h5delete(h5, "/spectra")
    if (!rhdf5::H5Lexists(h5, "/spectra"))
        rhdf5::h5createGroup(h5, "/spectra")
    spids <- paste0("/spectra/", scanIndex)
    for (i in seq_along(x)) {
        xi <- x[[i]]
        if (!is.matrix(xi))
            stop("Peak data has to be provided as a matrix but I got ",
                 class(xi))
        if (ncol(xi) != 2 || !all(c("mz", "intensity") %in% colnames(xi)))
            stop("A peak matrix should have two columns named \"mz\" and ",
                 "\"intensity\"")
        if (is.unsorted(xi[, 1]))
            stop("m/z values have to be ordered")
        rhdf5::h5write(xi, h5, name = spids[i], level = comp_level)
    }
    rhdf5::h5write(modCount, h5, "/header/modcount", level = comp_level)
}
rformassspectrometry/Spectra documentation built on Oct. 30, 2024, 5:42 a.m.