R/distancematrix_mds.R

Defines functions MDSplot distanceMatrix

Documented in distanceMatrix MDSplot

#' Create distance matrix from list of spectra
#'
#' \code{distanceMatrix()} creates a distance matrix from a list of MS2
#' spectra, MS1 pseudospectra or neutral loss patterns by pairwise comparison
#' using the specified distance function. This distance matrix is the basis for
#' CluMSID's data mining functions.
#'
#' @param speclist A list of \code{\linkS4class{MS2spectrum}} or
#'   \code{\linkS4class{pseudospectrum}} objects as generated by
#'   \code{\link{extractMS2spectra}} or \code{\link{extractPseudospectra}}.
#'
#' @param distFun The distance function to be used. At the moment, only
#'   \code{\link{cossim}} is implemented.
#'
#' @param type \code{"spectrum"} (default) for MS2 spectra or MS1 pseudospectra
#'   or \code{"neutral_losses"} for neutral loss patterns.
#'
#' @param mz_tolerance The \emph{m/z} tolerance to be used for merging, default
#'   is \code{1e-5}, i.e. +/- 10ppm. If the mass-to-charge ratios of two peaks
#'   differ less than \emph{mz_tolerance}, they are assumed to have the same
#'   \emph{m/z}
#'
#' @return A numeric \code{length(speclist)} by \code{length(speclist)} matrix
#'   containing pairwise distances (1 - similarity) between all features in
#'   \code{speclist}. Row and column names are taken from the \code{id} slot
#'   or, if present, pasted from the \code{id} and \code{annotation} slots of
#'   the \code{\linkS4class{MS2spectrum}} or
#'   \code{\linkS4class{pseudospectrum}} objects.
#'
#' @examples
#' load(file = system.file("extdata",
#'     "annotatedSpeclist.RData",
#'     package = "CluMSIDdata"))
#'
#' distanceMatrix(annotatedSpeclist[1:20])
#'
#' @importFrom S4Vectors isEmpty
#' @importFrom utils combn
#'
#' @export
distanceMatrix <- function(speclist, distFun = "cossim",
                            type = c("spectrum", "neutral_losses"),
                            mz_tolerance = 1e-5){
    if(distFun == "cossim"){
        type <- match.arg(type)
        dists <- vapply(
            X = utils::combn(speclist, 2, simplify = FALSE),
            FUN = function(x)
                1-cossim(x[[1]], x[[2]], type = type,
                            mzTolerance = mz_tolerance),
            FUN.VALUE = numeric(1)
        )
        distmat <- matrix(nrow = length(speclist),
                            ncol = length(speclist))
        distmat[lower.tri(distmat)] <- dists
        distmat[upper.tri(distmat)] <- t(distmat)[upper.tri(distmat)]
        diag(distmat) <- vapply(X = speclist,
                                FUN = function(x)
                                    1-cossim(x, x, type = type,
                                                mzTolerance = mz_tolerance),
                                FUN.VALUE = numeric(1))
    }
    featnames <- vapply(
        X = speclist,
        FUN = function(e) {
            if (S4Vectors::isEmpty(e@annotation) || e@annotation == "") {
                return(as.character(e@id))
            } else return(paste(e@id, e@annotation, sep = " - "))
        },
        FUN.VALUE = character(1)
    )
    dimnames(distmat) <- list(featnames, featnames)
    distmat[is.na(distmat)] <- 1
    return(distmat)
}

#' Multidimensional scaling of spectral similarity data
#'
#' \code{MDSplot()} is used to generate multidimensional scaling plots from
#' spectral similarity data. An interactive visualisation can be produced using
#' \pkg{plotly}.
#'
#' @param distmat A distance matrix as generated by
#'   \code{\link{distanceMatrix}}.
#'
#' @param interactive Logical, defaults to \code{FALSE}. If \code{TRUE}, an
#'   interactive visualisation is generated using \pkg{plotly}.
#'
#' @param highlight_annotated Logical, defaults to \code{FALSE}.
#'   If \code{TRUE}, points for features for which an annotation was added
#'   before using \code{\link{distanceMatrix}} are highlighted by red colour,
#'   while other points are grey in the MDS plot.
#'
#' @param ... Additional arguments passed to \code{geom_point()},
#'   e.g. \code{pch}, \code{size} or \code{alpha}.
#'
#' @return An MDS plot generated with the help of
#'   \code{\link[stats]{cmdscale}},
#'   \code{\link[ggplot2]{ggplot}} and, if interactive,
#'   \code{\link[plotly]{ggplotly}}.
#'
#' @importFrom methods is
#' @importFrom stats cmdscale as.dist
#' @importFrom plotly ggplotly
#'
#' @import ggplot2
#'
#' @examples
#' load(file = system.file("extdata",
#'     "distmat.RData",
#'     package = "CluMSIDdata"))
#'
#' MDSplot(distmat, highlight_annotated = TRUE)
#'
#' @export
MDSplot <- function(distmat,
                    interactive = FALSE,
                    highlight_annotated = FALSE,
                    ...){
    if(!methods::is(distmat, "dist")) distmat <- stats::as.dist(distmat)
    fit <- stats::cmdscale(distmat, k = 2)
    fitx <- fity <- anno <- NULL #only to appease CRAN check
    fit <- data.frame(fitx = fit[,1], fity = fit[,2], anno = row.names(fit))

    params <- list(...)
    if(!("pch" %in% names(params))) params$pch <- 16
    if(!("size" %in% names(params))) params$size <- 2
    if(!("alpha" %in% names(params))) params$alpha <- 0.5
    if(highlight_annotated){
        params$colour <- as.numeric(
            grepl(pattern = " - ",
                    x = fit$anno))+1
    }

    q <- ggplot2::ggplot(fit, ggplot2::aes( x = fitx,
                                            y = fity,
                                            text = anno)) +
        do.call(ggplot2::geom_point, args = params) +
        ggplot2::xlab("Coordinate 1") +
        ggplot2::ylab("Coordinate 2") +
        ggplot2::theme_light()


    if(interactive == FALSE) return(q) else {
        suppressMessages(p <- plotly::ggplotly(q, tooltip = "text"))
        return (p)
    }
}

Try the CluMSID package in your browser

Any scripts or data that you put into this service are public.

CluMSID documentation built on Nov. 8, 2020, 7:46 p.m.