R/mz_summary.R

Defines functions mz_vis mz_summary

Documented in mz_summary mz_vis

#' @name mz_summary
#'
#' @title Create a summary from adjacency list containing mass differences
#'
#' @description
#' The function `mz_summary` creates a summary from the `AdjacencyMatrix`, 
#' containing mass differences. Individual mass differences are counted over 
#' all features. The input may be an `AdjacencyMatrix` object originating from 
#' the function `structural`, or `combine`. The parameter `filter` will define 
#' if data will be filtered above a certain threshold or not. 
#' 
#' @param am
#' `AdjacencyMatrix`, a formal class of `AdjacencyMatrix` containing the 
#' mass differences, that have previously been generated by the function 
#' `structural`or `combine`
#' 
#' @param var
#' `character` vector corresponding to `assayNames(am)`, the counts will be 
#' grouped according to `var`
#' 
#' @param filter
#' `numeric`, leave empty or set to `0` if unfiltered data are 
#' required. Select a `numeric` as a threshold on counts of mz differences.
#' May be useful to visualize big data. 
#' 
#' @details
#' Summarizes the adjacency matrices containing mass difference values, 
#' i.e. either adjacency list from `structural`or `combine` may be used.
#' The default is filter = F, so the unfiltered summary will be returned. 
#' If filter is set to a `number`, e.g. 1000 only mz differences above 
#' this threshold will be displayed. 
#' 
#' The function can be applied for adjacency lists from `structural` and 
#' `combine`.
#' 
#' @return 
#' `data.frame` containing the numbers of present mz differences and
#' corresponding name. 
#'
#' @author Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} and
#' Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet") 
#' transformation <- rbind(
#'     c("Monosaccharide (-H2O)", "C6H10O5", "162.0528234315"),
#'     c("Disaccharide (-H2O)", "C12H20O11", "340.1005614851"),
#'     c("Trisaccharide (-H2O)", "C18H30O15", "486.1584702945"))
#' transformation <- data.frame(group = transformation[, 1],
#'                                 formula = transformation[, 2],
#'                                 mass = as.numeric(transformation[, 3]))
#' am_struct <- structural(x_test, transformation, ppm = 5, 
#'     var = c("group", "mass", "formula"), directed = TRUE)
#' # unfiltered mz difference counts
#' mz_summary(am_struct)
#' # filtered mz difference counts 
#' mz_summary(am_struct, filter = 2)
#'
#' @export
#'
#' @importFrom stats na.omit
#' @importFrom rlang .data
mz_summary <- function(am, var = c("group", "formula"), filter = 0){
  
    if (!is(am, "AdjacencyMatrix")) 
        stop("'am' is not an 'AdjacencyMatrix' object")
  
    if (!validObject(am))
        stop("'am' must be a valid 'AdjacencyMatrix' object") 

    if (!(am@type == "combine" | am@type == "structural"))
        stop("'am' is not of type 'structural' or 'combine'")
  
    ## check for integrity of var
    if (!is.character(var) | length(var) == 0)
        stop("'var' has to be a character of length > 0")
  
    var_err <- var[!var %in% SummarizedExperiment::assayNames(am)]
    if (length(var_err) > 0)
        stop(sprintf("assay '%s' not in 'am'", paste(var_err, collapse = "', '")))
      
    if (!is.numeric(filter)) 
        stop("'filter' needs to be numeric")
  
    if (filter < 0) 
        stop("'filter' needs to be 0 or positive numeric")

    am_df <- as.data.frame(am)
  
    ## if AdjacencyMatrix of type `combine` is used 
    if ("combine" %in% am@type) {
      
        if (!1 %in% am_df$combine_binary) 
            stop("assay 'combine_binary' does not contain any mass differences")
        
        am_df <- am_df[am_df$combine_binary == 1, ]
        am_df <- stats::na.omit(am_df, "combine_binary")
    }
  
    ## if AdjacencyMatrix of type `structural` is used 
    else {
      
        if (!1 %in% am_df$binary)
            stop("assay 'binary' does not contain any mass differences")
        am_df <- am_df[am_df$binary == 1, ]
    }
    df <- am_df |>
        dplyr::group_by_at(var) |>
        dplyr::count(name = "count") |>
        as.data.frame()
    
    ## apply some filtering, only return those transformations that pass filter
    df_f <- df[df$count >= filter, ]
    
    return(df_f)
}

#' @name mz_vis
#'
#' @title Visualize mass difference distribution 
#'
#' @description
#' The function `mz_vis` visualizes the mass difference distribution,
#' which has been summarized by `mz_summary`. 
#' 
#' @param df
#' `data.frame`, previously generated by `mz_summary`. Needs to contain
#' the columns "transformation", "mass_difference" and "counts".
#' 
#' @param var
#' `character(1)`, the column in `df` to visualize on the y-axis
#' 
#' @details
#' Plots the mass difference distribution, summarized
#' by `mz_summary`. 
#' Visualization is performed using ggplot2
#' 
#' @return 
#' `ggplot` object and corresponding barplot for visualizations
#'
#' @author Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} and
#' Thomas Naake, \email{thomasnaake@@googlemail.com}
#'
#' @examples
#' data("x_test", package = "MetNet")
#' transformation <- rbind(
#'     c("Monosaccharide (-H2O)", "C6H10O5", "162.0528234315"),
#'     c("Disaccharide (-H2O)", "C12H20O11", "340.1005614851"),
#'     c("Trisaccharide (-H2O)", "C18H30O15", "486.1584702945"))
#' transformation <- data.frame(group = transformation[, 1],
#'                                 formula = transformation[, 2],
#'                                 mass = as.numeric(transformation[, 3]))
#' am_struct <- structural(x_test, transformation, 
#'     var = c("group", "formula", "mass"), ppm = 5, directed = TRUE)
#' mz_sum <- mz_summary(am_struct, var = "group")
#' mz_vis(mz_sum)
#' 
#' @importFrom ggplot2 ggplot aes_string geom_bar theme_minimal coord_flip labs
#' @importFrom ggplot2 theme element_text sym
#' 
#' @export
mz_vis <- function(df, var = "group"){
   
  if (!is.data.frame(df)) 
    stop("'df' is not a data.frame")
  
  if (!"count" %in% colnames(df))
      stop("'df' does not contain the column 'count'")
  
  if (length(var) != 1 | !is.character(var))
      stop("'var' has to be a character vector of length 1")
  
  if (!var %in% colnames(df))
      stop(sprintf("'df' does not contain the column '%s'", var))
  
  ggplot2::ggplot(df, ggplot2::aes(x = !!ggplot2::sym(var),  
        y = !!ggplot2::sym("count"))) + 
    ggplot2::geom_bar(stat = "identity") + 
    ggplot2::theme_minimal() + 
    ggplot2::coord_flip() + 
    ggplot2::labs(title = "Numbers of determined mass differences") +
    ggplot2::theme(text = ggplot2::element_text(family = "sans", size = 12))
}
tnaake/MetNet documentation built on Nov. 4, 2024, 4:45 a.m.