#' Metabolite Candidates
#'
#' Post processing after obtaining the list of potential metabolite candidates.
#' These metabolites can be filtered by biofluid and are sorted by p-value in
#' ascending order. Unknown metabolites can be removed. After that, the results
#' the results are plotted using a word cloud. The lower the p-value, the higher
#' the font size of the metabolite. Additionally, we plot the a bar plot
#' corresponding to the number of metabolites candidates associated to a single
#' compound.
#'
#' @param path_result The folder where the .csv file where the annotated table
#' of metabolites is stored
#' @param biofluid_type Filters the results by biofludid type.
#' @param significance Filters the results by significance.
#' @param metabolite_rm Logical. Remove "Unknown" metabolites.
#' @param max_words Maximum number of words allowed in the word cloud.
#' @return Two plots: A word cloud with the potential identified metabolites and
#' a bar plot with the number of potential cantidates by detected compound.
#' @family data visualization functions
#' @export
#' @examples
#' \dontrun{
#' path_result <- system.file("extdata",
#' "results_project",
#' "tables",
#' "metaboliteTable.csv",
#' package = "AlpsLCMS")
#' plots <- lcms_plot_metabolites(path_result, biofluid_type = "Any",
#' significance = 0.05,
#' metabolite_rm = FALSE,
#' max_words = 250)
#' plots$cloud
#'
#' plots$freq
#' }
lcms_plot_metabolites <- function(path_result, biofluid_type = "Any",
significance = 0.05,
metabolite_rm = TRUE,
max_words = 250){
spectra <- NULL
rt <- NULL
Name <- NULL
p.adj <- NULL
Metabolite <- NULL
Biofluid <- NULL
Retention_Time <- NULL
Pvalue_Adj <- NULL
Angle <- NULL
Spectra <- NULL
Selected_Biofluid <- NULL
n <- NULL
#Load Measured Data
suppressWarnings(Measured <- readr::read_csv(file = path_result,
col_names = TRUE,
col_types = readr::cols_only(spectra = readr::col_number(),
rt = readr::col_double(),
Name = readr::col_factor(),
Biofluid = readr::col_character(),
p.adj = readr::col_double()),
skip_empty_rows = TRUE))
attr(Measured, "spec") <- NULL
# Some Checks
if (!is.logical(metabolite_rm)){
stop("The variable Metabolite_rm must be of class logical")
}
Biofluid_names <- Measured %>%
purrr::pluck("Biofluid") %>%
stringr::str_split( ";") %>%
base::unlist() %>%
stringr::str_replace("^ ", "") %>%
base::unique() %>%
base::sort()
if (!is.character(biofluid_type)){
stop("The variable Biofluid_type must be of class character")
} else if (!(biofluid_type %in% Biofluid_names)){
if (biofluid_type != "Any"){
stop(paste0("Your selected Biofluid is not among the following:",
"\n",stringr::str_c(Biofluid_names, collapse = ", "), "."))
}
}
if (!is.null(significance)){
if(is.numeric(significance)){
if((significance > 1) | (significance < 0)){
stop("Significance must be set to a value in the range [0, 1]")
}
}else {
stop("Significance must be a variable with class either numeric or NULL")
}
}
# Modify Column_names and drop na
Measured <- Measured %>%
dplyr::rename(Spectra = spectra,
Retention_Time = rt,
Metabolite = Name,
Pvalue_Adj = p.adj) %>%
tidyr::drop_na()
# Remove unknown Metabolites if needed
if(metabolite_rm == TRUE){
Measured <- Measured %>%
dplyr::filter(Metabolite != "Unknown")
}
# Filter by Biofluid
if(biofluid_type == "Any"){
Measured <- Measured %>%
dplyr::mutate(Selected_Biofluid = "Any")
max_area_wordcloud <- 11
} else{
Measured <- Measured %>%
dplyr::filter_at(dplyr::vars(Biofluid), dplyr::all_vars(stringr::str_detect(Biofluid, biofluid_type))) %>%
dplyr::mutate(Selected_Biofluid = biofluid_type)
max_area_wordcloud <- 11
}
# Select and arrange the data
Measured <- Measured %>%
dplyr::select(Retention_Time, Spectra, Metabolite, Selected_Biofluid, Biofluid, dplyr::everything()) %>%
dplyr::arrange(Retention_Time, Spectra)
#Filter by p-value
if(!is.null(significance)){
Measured <- Measured %>%
dplyr::filter(Pvalue_Adj <= significance)
}
# plots
#word_cloud
plot_df <- Measured %>%
dplyr::select(Retention_Time, Spectra, Metabolite, Pvalue_Adj) %>%
dplyr::mutate(Spectra = as.factor(Spectra)) %>%
dplyr::mutate(Angle = 0 * sample(c(0, 1),#90
dplyr::n(), replace = TRUE,
prob = c(60, 40))) %>%
dplyr::top_n(max_words,-log10(Pvalue_Adj))
#
text_title_cloud <- paste0("Metabolite Word Cloud. Biofluid: ",
base::unique(Measured$Selected_Biofluid), ". ","Pvalue < 0.05.")
set.seed(42)
cloud <- ggplot2::ggplot(
plot_df,
ggplot2::aes(
label = Metabolite, size = -log10(Pvalue_Adj),
colour = Retention_Time,
angle = Angle
)
) +
ggwordcloud::geom_text_wordcloud_area() +
ggplot2::scale_size_area(max_size = max_area_wordcloud) +#8 guay export, 4 plot
ggplot2::theme_minimal() +
ggplot2::scale_colour_continuous(type = "viridis") +
ggplot2::ggtitle(text_title_cloud)
# plots
#Barplot: Spectra - Metabolite Candidate
freq <- plot_df %>%
dplyr::group_by(Spectra) %>%
dplyr::count() %>%
ggplot2::ggplot(ggplot2::aes(x = stats::reorder(Spectra, n),
y = n, fill = stats::reorder(Spectra, n),
color = stats::reorder(Spectra, n))) +
ggplot2::geom_bar(stat="identity", position = "stack", alpha = 0.7, size = 1) +
ggplot2::scale_x_discrete("Spectrum Number") +
ggplot2::scale_y_continuous("Number of Metabolite Candidates") +
ggplot2::theme_minimal() +
ggplot2::theme(axis.ticks = ggplot2::element_blank(),
plot.title = ggplot2::element_text(size = 17),
axis.text.x = ggplot2::element_text(angle = 90),
axis.text = ggplot2::element_text(size = 8, color = "black"),
axis.title = ggplot2::element_text(size = 14, color = "black"),
axis.line = ggplot2::element_line(color = "black",
size = 1, linetype = "solid"),
legend.position = "none") +
ggplot2::ggtitle("Metabolite Candidates for Spectrum")
plot_list <- list(cloud = cloud, freq = freq)
plot_list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.