#' Volcano plots
#'
#' @inheritParams info_anal
#' @inheritParams prnVol
#' @inheritParams gspaMap
#' @import limma stringr purrr dplyr
#' @importFrom magrittr %>% %T>% %$% %<>%
plotVolcano <- function(df = NULL, df2 = NULL, id = "gene", adjP = FALSE,
topn_labels = 20, anal_type = "Volcano",
gspval_cutoff = 5E-2, gslogFC_cutoff = log2(1.2),
topn_gsets = Inf, show_sig = "none", fml_nms = NULL,
gset_nms = "go_sets", scale_log2r = TRUE,
complete_cases = FALSE, impute_na = FALSE,
filepath = NULL, filename = NULL, theme = NULL,
highlights = NULL, grids = NULL, ...)
{
stopifnot(vapply(c(scale_log2r, complete_cases, impute_na, adjP),
rlang::is_logical, logical(1L)))
stopifnot(vapply(c(gspval_cutoff, gslogFC_cutoff, topn_gsets),
is.numeric, logical(1L)))
stopifnot(is.numeric(topn_labels), topn_labels >= 0L)
id <- rlang::as_string(rlang::enexpr(id))
df <- df %>%
`rownames<-`(.[, id]) %>%
dplyr::select(-grep("I[0-9]{3}|log2_R[0-9]{3}|^.*\\.FC \\(.*\\)$", names(.)))
dots <- rlang::enexprs(...)
dots <- concat_fml_dots(
fmls = dots %>% .[grepl("^\\s*~", .)],
fml_nms = fml_nms,
dots = dots %>% .[!grepl("^\\s*~", .)],
)
filter_dots <- dots %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^filter_", names(.))]
arrange_dots <- dots %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^arrange_", names(.))]
select_dots <- dots %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^select_", names(.))]
dots <- dots %>%
.[! . %in% c(filter_dots, arrange_dots, select_dots)]
df <- df %>%
filters_in_call(!!!filter_dots) %>%
arrangers_in_call(!!!arrange_dots)
rm(list = c("filter_dots", "arrange_dots", "select_dots"))
if (adjP) {
df <- df %>%
dplyr::select(-contains("pVal")) %>%
`names<-`(gsub("adjP", "pVal", names(.)))
}
else {
df <- df %>%
dplyr::select(-contains("adjP"))
}
fmls <- dots %>% .[grepl("^\\s*~", .)]
dots <- dots %>% .[! names(.) %in% names(fmls)]
fml_nms <- names(df) %>%
.[grepl("pVal", .)] %>%
gsub("(.*)\\.pVal.*", "\\1", .) %>%
unique() %>%
.[. %in% names(fmls)]
fmls <- fmls %>% .[names(.) %in% fml_nms]
fml_nms <- fml_nms %>% .[map_dbl(., ~ which(.x == names(fmls)))]
if (!length(fml_nms))
stop("No formula (names) matched to those in \"pepSig(...)\" or \"prnSig(...)\".")
purrr::walk(file.path(filepath, fml_nms),
~ dir.create(.x, recursive = TRUE, showWarnings = FALSE))
col_ind <- fml_nms %>%
purrr::map(~ grepl(paste0("^", .x, "\\."), names(df))) %>%
`names<-`(paste0("nm_", seq_along(.))) %>%
dplyr::bind_cols() %>%
rowSums() %>%
magrittr::is_greater_than(0)
species <- df$species %>%
unique() %>%
.[!is.na(.)] %>%
as.character()
if (length(species) && !is.null(gset_nms))
load_dbs(gset_nms = gset_nms, species = species)
proteoq_volcano_theme <- theme_bw() +
theme(
axis.text.x = element_text(angle = 0, vjust = 0.5, size = 24),
axis.ticks.x = element_blank(),
axis.text.y = element_text(angle = 0, vjust = 0.5, size = 24),
axis.title.x = element_text(colour = "black", size = 24),
axis.title.y = element_text(colour="black", size = 24),
plot.title = element_text(face = "bold", colour = "black",
size = 14, hjust = .5, vjust = .5),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
strip.text.x = element_text(size = 16, colour = "black", angle = 0),
strip.text.y = element_text(size = 16, colour = "black", angle = 90),
legend.key = element_rect(colour = NA, fill = 'transparent'),
legend.background = element_rect(colour = NA, fill = "transparent"),
legend.position = "none",
legend.title = element_text(colour="black", size = 18),
legend.text = element_text(colour="black", size = 18),
legend.text.align = 0,
legend.box = NULL
)
if (is.null(theme))
theme <- proteoq_volcano_theme
if (length(fml_nms)) {
purrr::pwalk(list(fml_nms,
gspval_cutoff,
gslogFC_cutoff,
topn_gsets),
byfml_volcano,
df = df,
df2 = df2,
col_ind = col_ind,
id = !!id,
filepath = filepath,
filename = filename,
adjP = adjP,
topn_labels = topn_labels,
anal_type = anal_type,
show_sig = show_sig,
gset_nms = gset_nms,
scale_log2r = scale_log2r,
complete_cases = complete_cases,
impute_na = impute_na,
highlights = highlights,
grids = grids,
theme = theme,
!!!dots)
}
}
#' Helper of comeplet cases.
#'
#' @param df A data frame.
pval_complete_cases <- function (df)
{
rows <- df %>%
dplyr::select(grep("^pVal\\s+", names(.))) %>%
complete.cases()
if (sum(rows) == 0)
stop("None of the cases are complete.")
df[rows, ]
}
#' Formula specific volcano plots
#'
#' @inheritParams info_anal
#' @inheritParams prnVol
#' @inheritParams gspaMap
#' @inheritParams fml_gspa
#' @import purrr dplyr
#' @importFrom magrittr %>% %T>% %$% %<>%
byfml_volcano <- function (fml_nm = NULL, gspval_cutoff, gslogFC_cutoff, topn_gsets,
df, df2, col_ind, id, filepath, filename, adjP,
topn_labels, anal_type, show_sig, gset_nms,
scale_log2r, complete_cases, impute_na,
highlights = NULL, grids = NULL, theme = NULL, ...)
{
id <- rlang::as_string(rlang::enexpr(id))
df <- df %>%
dplyr::select(grep(paste0("^", fml_nm, "\\."), names(.))) %>%
`colnames<-`(gsub(paste0("^", fml_nm, "\\."), "", names(.))) %>%
dplyr::bind_cols(df[, !col_ind, drop = FALSE], .)
if (complete_cases)
df <- pval_complete_cases(df)
byfile_plotVolcano(df = df, df2 = df2, id = !!id, fml_nm = fml_nm,
filepath = filepath, filename = filename,
adjP = adjP, topn_labels = topn_labels,
anal_type = anal_type, gset_nms = gset_nms,
scale_log2r = scale_log2r,
impute_na = impute_na)(gspval_cutoff = gspval_cutoff,
gslogFC_cutoff = gslogFC_cutoff,
topn_gsets = topn_gsets,
show_sig = show_sig,
highlights = highlights,
grids = grids,
theme = theme, ...)
}
#' Plot volcanos
#'
#' @inheritParams info_anal
#' @inheritParams prnVol
#' @inheritParams gspaMap
#' @inheritParams fml_gspa
#' @import limma stringr purrr dplyr
#' @importFrom magrittr %>% %T>% %$% %<>%
byfile_plotVolcano <- function(df = NULL, df2 = NULL, id = "gene", fml_nm = NULL,
filepath = NULL, filename = NULL,
adjP = FALSE, topn_labels = 20,
anal_type = "Volcano", gset_nms = "go_sets",
scale_log2r = TRUE, impute_na = FALSE,
# highlights = NULL,
theme = NULL, ...)
{
id <- rlang::as_string(rlang::enexpr(id))
contrast_groups <- names(df[grep("^log2Ratio\\s+\\(", names(df))]) %>%
gsub("^log2Ratio\\s+\\(|\\)$", "", .)
if (anal_type %in% c("Volcano")) {
function(gspval_cutoff = 1, gslogFC_cutoff = 0, topn_gsets = 0,
show_sig = "none", theme = NULL, highlights = NULL, grids = NULL,
...) {
rm(gspval_cutoff, gslogFC_cutoff, show_sig)
fullVolcano(df = df,
id = !!id,
contrast_groups = contrast_groups,
theme = theme,
fml_nm = fml_nm,
filepath = filepath,
filename = filename,
adjP = adjP,
topn_labels = topn_labels,
highlights = highlights,
grids = grids,
...)
}
}
else if (anal_type == "mapGSPA")
function(gspval_cutoff = 5E-2, gslogFC_cutoff = log2(1.2), topn_gsets = Inf,
show_sig = "none", theme = theme, ...) {
stopifnot(!is.null(fml_nm))
filepath_fml <- file.path(filepath, fml_nm)
in_names <- list.files(path = filepath_fml, pattern = "_GSPA_[ONZ].*\\.txt$")
if (!length(in_names)) {
warning("No inputs under ", filepath_fml, call. = FALSE)
return(NULL)
}
if (is.null(df2)) {
in_names <- in_names %>%
.[!grepl("_essmap|_essmeta|_resgreedy", .)] %>%
{if (impute_na) .[grepl("_impNA", .)] else .[!grepl("_impNA", .)]}
in_names <- if (is.na(scale_log2r))
in_names[grepl("_GSPA_O", in_names)]
else if (scale_log2r)
in_names[grepl("_GSPA_Z", in_names)]
else
in_names[grepl("_GSPA_N", in_names)]
if (!length(in_names))
stop("No inputs correspond to impute_na = ", impute_na,
", scale_log2r = ", scale_log2r,
" at fml_nms = ", fml_nm)
df2 <- in_names
}
else {
local({
if (grepl("_essmap|_essmeta|_resgreedy", df2))
stop("Do not use `_essmap`, `_essmeta` or `_resgreedy` for `df2`.")
non_exists <- df2 %>% .[! . %in% in_names]
if (length(non_exists))
stop("Missing file(s): ", paste(non_exists, collapse = ", "))
})
if (!length(df2))
stop("File(s) not found under \"", filepath_fml, "\".")
}
# plot data ---------------------------
purrr::walk(df2, gsVolcano,
df = df,
contrast_groups = contrast_groups,
gsea_key = "term",
gsets = gsets,
theme = theme,
fml_nm = fml_nm,
filepath = filepath,
filename = filename,
adjP = adjP,
topn_labels = topn_labels,
show_sig = show_sig,
gspval_cutoff = gspval_cutoff,
gslogFC_cutoff = gslogFC_cutoff,
topn_gsets = topn_gsets,
...)
}
}
#' Sets data for volcan plots.
#'
#' @param x A contrast
#' @param df A data frame
#' @param keys Column keys.
subset_volcano_df <- function (x, df, keys = c("pVal", "adjP", "log2Ratio"))
{
nms <- names(df)
pat <- paste0(" (", x, ")")
cols <- lapply(keys, function (key)
grepl(paste0(key, pat), nms, fixed = TRUE) & grepl(paste0("^", key), nms)
)
dfa <- df[, purrr::reduce(cols, `|`)] %>%
`colnames<-`(gsub("\\s+\\(.*\\)$", "", names(.))) %>%
dplyr::mutate(Contrast = x)
pat_i <- lapply(keys, function (key) paste0("^", key, " "))
dfb <- df[, !grepl(paste(pat_i, collapse = "|"), nms), drop = FALSE]
dplyr::bind_cols(dfb, dfa)
}
#' Volcano plots for all proteins or peptides in a data set
#'
#' @param contrast_groups The contrast groups defined under a formula at \code{fml_nm}.
#' @inheritParams info_anal
#' @inheritParams prnVol
#' @inheritParams gspaMap
#' @inheritParams fml_gspa
#' @import dplyr purrr ggplot2
#' @importFrom magrittr %>% %T>% %$% %<>%
#' @importFrom limma vennDiagram
fullVolcano <- function(df = NULL, id = "gene", contrast_groups = NULL, theme = NULL,
fml_nm = NULL, filepath = NULL, filename = NULL, adjP = FALSE,
topn_labels = 20, highlights = NULL, grids = NULL, ...)
{
id <- rlang::as_string(rlang::enexpr(id))
dots <- rlang::enexprs(...)
xco <- if (is.null(dots[["xco"]])) 1.2 else dots[["xco"]]
yco <- if (is.null(dots[["yco"]])) .05 else dots[["yco"]]
title <- dots[["title"]]
x_label <- if (is.null(dots[["x_label"]]))
expression("Ratio ("*log[2]*")")
else
dots[["x_label"]]
y_label <- if (is.null(dots[["y_label"]])) {
if (adjP)
expression("adjP ("*-log[10]*")")
else
expression("pVal ("*-log[10]*")")
}
else {
dots[["y_label"]]
}
if (!length(contrast_groups))
stop("No constrasts available.")
if (!is.null(grids))
contrast_groups <- contrast_groups[grids]
dfw <- lapply(contrast_groups, subset_volcano_df, df = df,
keys = c("pVal", "adjP", "log2Ratio"))
local({
if (length(dfw) > 1L) {
ncols <- unlist(lapply(dfw, ncol))
if (length(unique(ncols)) > 1L)
stop("Uneven number of columns detected. ", "Please report the bug.")
nms <- lapply(dfw, names)
nms_1 <- nms[[1]]
nms_all <- unique(unlist(nms))
if (length(nms_1) != length(nms_all))
stop("Uneven column names detected. ", "Please report the bug.")
}
})
dfw <- dfw %>%
do.call(rbind, .) %>%
dplyr::mutate(
Contrast = factor(Contrast, levels = contrast_groups),
valence = ifelse(.$log2Ratio > 0, "pos", "neg")
) %>%
dplyr::filter(!is.na(pVal))
dfw_sub <- dfw %>%
dplyr::filter((pVal < yco) & (abs(log2Ratio) > log2(xco))) %>%
dplyr::arrange(Contrast, pVal) %>%
dplyr::group_by(Contrast) %>%
dplyr::mutate(Index = row_number()) %>%
data.frame (check.names = FALSE)
if(is.null(highlights))
dfw_high <- dfw_sub[0, ]
else {
if (!is.list(highlights))
highlights <- list(highlights)
if (length(highlights) > 1L)
stop("Single expression for `highlights`.")
rows <- eval(highlights[[1]], dfw_sub)
dfw_high <- dfw_sub[rows, ]
}
if (nrow(dfw_high)) {
warning("Overruled `topn_labels` by `highlights`.")
dfw_sub_top20 <- dfw_high
}
else {
dfw_sub_top20 <- dfw_sub %>%
dplyr::group_by(Contrast) %>%
dplyr::top_n(n = -topn_labels, wt = pVal) %>%
data.frame (check.names = FALSE)
}
# data table for labels
if (FALSE) {
dt <- purrr::map(contrast_groups, ~ {
to_csv_(dfw_sub_top20 %>%
dplyr::filter(Contrast == .x) %>%
dplyr::select(c("Index", id))) %>%
{if(!grepl("\n", .)) . <- paste0(.,"\n1,\"NA\"") else .} # zero-entry exception
}) %>%
do.call(rbind, .) %>%
data.frame(Contrast = contrast_groups, id = ., stringsAsFactors = FALSE) %>%
dplyr::rename(!!rlang::sym(id) := id) %>%
dplyr::mutate(Contrast = factor(Contrast, levels = contrast_groups))
}
dt <- dfw_sub_top20 %>%
split(.$Contrast) %>%
lapply(`[`, c("Index", id)) %>%
lapply(to_csv_) %>%
lapply(function (x) {
if(!grepl("\n", x)) x <- paste0(x,"\n1,\"NA\"") else x
}) %>%
do.call(rbind, .) %>%
data.frame(Contrast = contrast_groups, id = ., stringsAsFactors = FALSE) %>%
dplyr::rename(!!rlang::sym(id) := id) %>%
dplyr::mutate(Contrast = factor(Contrast, levels = contrast_groups))
fn_prefix <- gsub("\\.[^.]*$", "", filename)
myPalette <- c("#377EB8", "#E41A1C")
if (nrow(dfw_high)) {
dfw_greater <- dfw_high[dfw_high$valence == "pos", ]
dfw_less <- dfw_high[dfw_high$valence == "neg", ]
}
else {
dfw_greater <- dfw_sub[dfw_sub$valence == "pos", ]
dfw_less <- dfw_sub[dfw_sub$valence == "neg", ]
}
nrow <- if (is.null(dots$nrow))
if (length(unique(dfw$Contrast)) > 3L) 2L else 1L
else
dots$nrow
if (is.null(dots$xmax)) {
xmax <- ceiling(pmax(abs(min(dfw$log2Ratio, na.rm = TRUE)),
max(dfw$log2Ratio, na.rm = TRUE)))
}
else {
xmax <- eval(dots$xmax)
stopifnot(xmax > 0)
}
if (is.null(dots$xmin)) {
xmin <- -xmax
}
else {
xmin <- eval(dots$xmin)
stopifnot(xmin < 0)
}
if (is.null(dots$ymax)) {
ymax <- ceiling(max(-log10(dfw$pVal), na.rm = TRUE)) * 1.1
}
else {
ymax <- eval(dots$ymax)
stopifnot(ymax > 0)
}
if (is.null(dots$ymin)) {
ymin <- 0
}
else {
ymin <- eval(dots$ymin)
stopifnot(ymin < ymax)
}
p <- ggplot() +
geom_point(data = dfw, mapping = aes(x = log2Ratio, y = -log10(pVal)),
size = 3, colour = "#252525", shape = 20, alpha = .5) +
geom_point(data = dfw_greater, mapping = aes(x = log2Ratio, y = -log10(pVal)),
size = 3, color = myPalette[2], shape = 20, alpha = .8) +
geom_point(data = dfw_less, mapping = aes(x = log2Ratio, y = -log10(pVal)),
size = 3, color = myPalette[1], shape = 20, alpha = .8) +
geom_hline(yintercept = -log10(yco), linetype = "longdash", size = .5) +
geom_vline(xintercept = -log2(xco), linetype = "longdash", size = .5) +
geom_vline(xintercept = log2(xco), linetype = "longdash", size = .5) +
scale_x_continuous(limits = c(xmin, xmax)) +
scale_y_continuous(limits = c(ymin, ymax)) +
labs(title = title, x = x_label, y = y_label) +
theme
if (nrow(dfw_sub_top20)) {
p <- p + geom_text(data = dfw_sub_top20,
mapping = aes(x = log2Ratio,
y = -log10(pVal),
label = Index,
color = Index),
size = 3,
alpha = .5,
hjust = 0,
nudge_x = 0.05,
vjust = 0,
nudge_y = 0.05,
na.rm = TRUE)
p <- p + facet_wrap(~ Contrast, nrow = nrow, labeller = label_value)
p <- p + geom_table(data = dt, aes(table = !!rlang::sym(id)),
x = -xmax*.85, y = ymax/2)
}
else {
p <- p + facet_wrap(~ Contrast, nrow = nrow, labeller = label_value)
}
if(is.null(dots$width)) {
width <- if (nrow > 1L)
6*length(unique(dfw$Contrast))/nrow + 1
else
6*length(unique(dfw$Contrast))/nrow
}
else {
width <- eval(dots$width)
}
height <- if(is.null(dots$height))
6*nrow
else
eval(dots$height)
ggsave_dots <- set_ggsave_dots(dots, c("filename", "plot", "width", "height"))
rlang::eval_tidy(rlang::quo(ggsave(filename = file.path(filepath, fml_nm, filename),
plot = p,
width = width,
height = height,
!!!ggsave_dots)))
summ_venn(df = dfw_greater, id = id, contrast_groups = contrast_groups,
filepath = filepath, direction = paste0(fn_prefix, "_greater"),
fml_nm = fml_nm)
summ_venn(df = dfw_less, id = id, contrast_groups = contrast_groups,
filepath = filepath, direction = paste0(fn_prefix, "_less"),
fml_nm = fml_nm)
saveRDS(
list(
data = dfw,
greater = dfw_greater,
less = dfw_less,
topns = dfw_sub_top20,
highlights = dfw_high,
topn_labels = dt,
palette = myPalette,
xco = xco,
yco = yco,
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax,
title = title,
x_label = x_label,
y_label = y_label,
theme = theme
),
file.path(filepath, fml_nm, paste0(fn_prefix, ".rds"))
)
}
#' Venn counts.
#'
#' @param df A data frame.
#' @param id ID.
#' @param contrast_groups Contrast groups
#' @param max_size The maximum number of contrast groups.
#' @inheritParams plot_venn
summ_venn <- function(df, id, contrast_groups, filepath, direction, fml_nm,
max_size = 20L)
{
len <- length(contrast_groups)
if (len > max_size) {
message("The number of contrasts is more than ", max_size, ".")
return(NULL)
}
universe <- sort(unique(df[[id]]))
Counts <- matrix(0, nrow = length(universe), ncol = len) %>%
`rownames<-`(universe) %>%
`colnames<-`(contrast_groups)
others <- vector("list", len)
names(others) <- contrast_groups
for (Group in contrast_groups) {
df2 <- dplyr::filter(df, Contrast == Group)
ids <- df2[[id]]
Counts[, Group] <- rownames(Counts) %in% ids
nm_p <- paste0(fml_nm, ".", "pVal (", Group, ")")
nm_r <- paste0(fml_nm, ".", "log2Ratio (", Group, ")")
others[[Group]] <- df2[, c(id, "pVal", "log2Ratio")] %>%
dplyr::rename(!!nm_p := pVal, !!nm_r := log2Ratio)
}
plot_venn(Counts, filepath, direction, fml_nm)
ans <- local({
a <- Counts %>%
data.frame(check.names = FALSE) %>%
tibble::rownames_to_column(id)
b <- others %>%
purrr::reduce(dplyr::full_join, by = id)
dplyr::left_join(a, b, by = id) %T>%
readr::write_tsv(file.path(filepath, fml_nm, paste0(direction, "_venn.txt")))
})
invisible(ans)
}
#' Plots Venn.
#'
#' @param Counts Counts from \link{summ_venn}.
#' @param filepath A file path.
#' @param direction A direction of up or down.
#' @param fml_nm Formula name.
plot_venn <- function(Counts, filepath, direction, fml_nm)
{
if (is.null(Counts))
return(NULL)
myPalette <- brewer.pal(n = 9, name = "Set1")
Width <- 3
Height <- 3
fn_prefix <- paste0(direction, "_venn")
if (FALSE) {
write.table(Counts, file.path(filepath, fml_nm, paste0(fn_prefix, ".txt")),
sep = "\t", col.names = TRUE, row.names = TRUE, quote = FALSE)
}
if (ncol(Counts) > 5L) {
warning("No Venn diagrams with more than 5 groups.")
return(NULL)
}
png(file.path(filepath, fml_nm, paste0(fn_prefix, ".png")),
width = Width, height = Height, units = "in", res = 300)
limma::vennDiagram(limma::vennCounts(Counts), circle.col = myPalette, cex = .5)
dev.off()
}
#' Volcano plots of protein \code{log2FC} under given gene sets
#'
#' @param gsea_key Character string; the column key indicating the terms of gene sets.
#' @param gsets The gene sets.
#' @inheritParams info_anal
#' @inheritParams prnVol
#' @inheritParams gspaMap
#' @inheritParams fml_gspa
#' @inheritParams fullVolcano
#' @import dplyr ggplot2
#' @importFrom magrittr %>% %T>% %$% %<>%
gsVolcano <- function(df2 = NULL, df = NULL, contrast_groups = NULL,
gsea_key = "term", gsets = NULL,
theme = NULL, fml_nm = NULL,
filepath = NULL, filename = NULL, adjP = FALSE,
topn_labels = 20, show_sig = "none",
gspval_cutoff = 1E-6, gslogFC_cutoff = log2(1.2),
topn_gsets = Inf, ...)
{
dat_dir <- get_gl_dat_dir()
par_filepath <- file.path(dat_dir, "Calls",
gsub("\\.txt$", "@", df2) %>% paste0(fml_nm, ".rda"))
if (file.exists(par_filepath)) load(par_filepath) else call_pars <- NULL
df <- local({
par_filter_dots <- call_pars %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^filter_", names(.))]
if (!purrr::is_empty(par_filter_dots)) {
message(
"\nApply the following vararg(s) from matching `prnGSPA` to: ", fml_nm, ".",
paste(par_filter_dots, "\n"))
df %>% filters_in_call(!!!par_filter_dots)
}
else {
message("\nNo `filter_` varargs with formula `", fml_nm, "`.")
invisible(df)
}
})
local({
use_adjP <- call_pars$use_adjP
if (use_adjP != adjP) {
warning("\n",
"Current analysis: `adjP = ", adjP, "`.\n",
"Data `", df2, "@", fml_nm,
"`: based on `prnGSPA(use_adjP = ", use_adjP, ")`\n",
"May consider `gspaMap(adjP = ", use_adjP, ")` for ",
df2, "@", fml_nm, ".\n",
" See also `?gspaMap` for analyses against a specific ",
"file and and formula.\n\n",
call. = FALSE)
}
})
pval_cutoff <- local({
par_pval_cutoff <- call_pars$pval_cutoff
if (length(par_pval_cutoff)) par_pval_cutoff else 0.05
})
logFC_cutoff <- local({
par_logFC_cutoff <- call_pars$logFC_cutoff
if (length(par_logFC_cutoff)) par_logFC_cutoff else log2(1.2)
})
custom_prefix <- gsub("(.*_{0,1})Protein_GSPA.*", "\\1", df2)
dir.create(path = file.path(filepath, fml_nm, custom_prefix),
recursive = TRUE, showWarnings = FALSE)
fn_suffix <- gsub("^.*\\.([^.]*)$", "\\1", filename)
fn_prefix <- gsub("\\.[^.]*$", "", filename)
filename <- paste0(custom_prefix, fn_prefix, ".", fn_suffix)
dots <- rlang::enexprs(...)
xco <- ifelse(is.null(dots$xco), 1.2, dots$xco)
yco <- ifelse(is.null(dots$yco), .05, dots$yco)
x_label <- if (is.null(dots$x_label))
expression("Ratio ("*log[2]*")")
else
dots$x_label
y_label <- if (is.null(dots$y_label)) {
if (adjP)
expression("adjP ("*-log[10]*")")
else
expression("pVal ("*-log[10]*")")
}
else {
dots$y_label
}
gsea_res <- tryCatch(
readr::read_tsv(file.path(filepath, fml_nm, df2),
col_types = cols(term = col_character(),
is_essential = col_logical(),
size = col_double(),
ess_size = col_double(),
contrast = col_character(),
p_val = col_double(),
q_val = col_double(),
log2fc = col_double())),
error = function(e) NA
)
message("Secondary file loaded: ", file.path(filepath, fml_nm, df2))
filter2_dots <- dots %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^filter2_", names(.))]
arrange2_dots <- dots %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^arrange2_", names(.))]
select2_dots <- dots %>%
.[purrr::map_lgl(., is.language)] %>%
.[grepl("^select2_", names(.))]
dots <- dots %>%
.[! . %in% c(filter2_dots, arrange2_dots, select2_dots)]
gsea_res <- gsea_res %>%
dplyr::arrange(p_val, abs(log2fc)) %>%
filters_in_call(!!!filter2_dots) %>%
arrangers_in_call(!!!arrange2_dots)
rm(list = c("filter2_dots", "arrange2_dots", "select2_dots"))
if (!nrow(gsea_res))
stop("No GSPA terms available after data filtration.")
topn_gsets <- pmin(dplyr::n_distinct(gsea_res[[gsea_key]]), topn_gsets)
terms <- gsea_res %>%
dplyr::arrange(p_val) %>%
dplyr::slice(1:(topn_gsets*length(contrast_groups))) %>%
dplyr::filter(p_val <= gspval_cutoff,
abs(log2fc) >= gslogFC_cutoff) %>%
dplyr::select(gsea_key) %>%
unique() %>%
unlist() %>%
.[. %in% names(gsets)]
gsea_res <- gsea_res %>%
dplyr::mutate(p_val = format(p_val, scientific = TRUE, digits = 2)) %>%
dplyr::mutate(q_val = format(p_val, scientific = TRUE, digits = 2)) %>%
dplyr::mutate(log2fc = round(log2fc, digits = 2))
if (length(terms)) {
dfw <- do.call(rbind,
purrr::map(contrast_groups, ~ {
df[, grepl(paste0(" (", .x, ")"), names(df), fixed = TRUE)] %>%
`colnames<-`(gsub("\\s+\\(.*\\)$", "", names(.))) %>%
dplyr::mutate(Contrast = .x) %>%
dplyr::bind_cols(df[, !grepl("^pVal\\s+|^adjP\\s+|^log2Ratio\\s+", names(df))], .)
} )) %>%
dplyr::mutate(Contrast = factor(Contrast, levels = contrast_groups),
pVal = as.numeric(pVal),
valence = ifelse(.$log2Ratio > 0, "pos", "neg")) %>%
dplyr::filter(!is.na(pVal))
lapply(terms, function(gt) {
# some results may be based on gene sets from an older database,
# which become missing terms in the current
gsets_sub <- gsets %>% .[names(.) == gt]
if (!length(gsets_sub))
return(NULL)
fn <- gsub(":", "~", gsub("/", "or", names(gsets_sub)[[1]]), fixed = TRUE)
res_sub <- gsea_res[as.character(gsea_res$term) == gt, ] %>%
data.frame(check.names = FALSE)
dfw_sub <- dfw[as.character(dfw$entrez) %in% gsets_sub[[1]], ]
if (!nrow(dfw_sub))
return(NULL)
if (is.null(dots$xmax)) {
xmax <- ceiling(pmax(abs(min(dfw_sub$log2Ratio, na.rm = TRUE)),
max(dfw_sub$log2Ratio, na.rm = TRUE)))
}
else {
xmax <- eval(dots$xmax)
stopifnot(xmax > 0)
}
if (is.null(dots$xmin)) {
xmin <- -xmax
}
else {
xmin <- eval(dots$xmin)
stopifnot(xmin < 0)
}
if (is.null(dots$ymax)) {
ymax <- ceiling(max(-log10(dfw_sub$pVal))) * 1.1
}
else {
ymax <- eval(dots$ymax)
stopifnot(ymax > 0)
}
if (is.null(dots$ymin)) {
ymin <- 0
}
else {
ymin <- eval(dots$ymin)
stopifnot(ymin < ymax)
}
# ensure the same levels between "Levels" and "newLevels"
Levels <- levels(dfw_sub$Contrast)
dfw_sub <- dfw_sub %>%
dplyr::arrange(Contrast, pVal) %>%
dplyr::group_by(Contrast) %>%
dplyr::mutate(Index = row_number()) %>%
data.frame(check.names = FALSE) %>%
dplyr::mutate(Contrast = as.character(Contrast)) %>%
dplyr::left_join(., res_sub, by = c("Contrast" = "contrast")) %>%
dplyr::mutate(Contrast = factor(Contrast, levels = Levels)) %>%
dplyr::arrange(Contrast) %>%
# dplyr::mutate(p_val = format(p_val, scientific = TRUE, digits = 2)) %>%
# dplyr::mutate(p_val = as.numeric(p_val)) %>%
# dplyr::mutate(q_val = format(q_val, scientific = TRUE, digits = 2)) %>%
# dplyr::mutate(q_val = as.numeric(q_val)) %>%
# dplyr::mutate(sig_level = ifelse(.$q.val > 0.05, "n.s.", ifelse(.$q.val > 0.005, "*", "**"))) %>%
# dplyr::mutate(newContrast = paste0(Contrast, " (", sig_level, ")"))
dplyr::mutate(newContrast = Contrast)
if (show_sig != "none") {
if (grepl("^p", show_sig)) {
dfw_sub <- dfw_sub %>%
dplyr::mutate(newContrast = paste0(Contrast, " (p = ", p_val, ")"))
}
else if (grepl("^q", show_sig)) {
dfw_sub <- dfw_sub %>%
dplyr::mutate(newContrast = paste0(Contrast, " (q = ", q_val, ")"))
}
}
newLevels <- unique(dfw_sub$newContrast)
dfw_sub <- dfw_sub %>%
dplyr::mutate(newContrast = factor(newContrast, levels = newLevels)) %>%
dplyr::arrange(newContrast)
dfw_sub_top20 <- dfw_sub %>%
dplyr::group_by(newContrast) %>%
dplyr::top_n(n = -topn_labels, wt = pVal)
dt <- purrr::map(newLevels, ~ {
dfw_sub_top20 %>%
dplyr::filter(newContrast == .x) %>%
data.frame(check.names = FALSE) %>%
dplyr::select(c("Index", "gene")) %>%
to_csv_() %>%
{if(!grepl("\n", .)) . <- paste0(.,"\n1,\"NA\"") else .}
}) %>%
do.call(rbind, .) %>%
data.frame(newContrast = newLevels,
Contrast = Levels,
Gene = .,
stringsAsFactors = FALSE) %>%
dplyr::mutate(Contrast = factor(Contrast, levels = Levels)) %>%
dplyr::mutate(newContrast = factor(newContrast, levels = newLevels))
dfw_greater <- dfw_sub %>% dplyr::filter(pVal < yco & log2Ratio > log2(xco))
dfw_less <- dfw_sub %>% dplyr::filter(pVal < yco & log2Ratio < -log2(xco))
dt_pos <- ifelse(nrow(dfw_greater) > nrow(dfw_less), -xmax*.85, xmax*.6)
myPalette <- c("#377EB8", "#E41A1C")
nrow <- if(is.null(dots$nrow))
if (length(unique(dfw_sub$Contrast)) > 3L) 2L else 1L
else
dots$nrow
p <- ggplot() +
geom_point(data = dfw_sub, mapping = aes(x = log2Ratio, y = -log10(pVal)),
size = 3, colour = "gray", shape = 20, alpha = .5) +
geom_point(data = dfw_greater, mapping = aes(x = log2Ratio, y = -log10(pVal)),
size = 3, color = myPalette[2], shape = 20, alpha = .8) +
geom_point(data = dfw_less, mapping = aes(x = log2Ratio, y = -log10(pVal)),
size = 3, color = myPalette[1], shape = 20, alpha = .8) +
geom_hline(yintercept = -log10(yco), linetype = "longdash", size = .5) +
geom_vline(xintercept = -log2(xco), linetype = "longdash", size = .5) +
geom_vline(xintercept = log2(xco), linetype = "longdash", size =.5) +
geom_hline(yintercept = -log10(pval_cutoff),
linetype = "longdash", size = .5, color = "#fc9272") +
geom_vline(xintercept = -logFC_cutoff,
linetype = "longdash", size = .5, color = "#fc9272") +
geom_vline(xintercept = logFC_cutoff,
linetype = "longdash", size =.5, color = "#fc9272") +
labs(title = names(gsets_sub), x = x_label, y = y_label) +
scale_x_continuous(limits = c(xmin, xmax)) +
scale_y_continuous(limits = c(ymin, ymax)) +
theme
p <- p + facet_wrap(~ newContrast, nrow = nrow, labeller = label_value)
if (nrow(dfw_sub_top20)) {
p <- p + geom_text(data = dfw_sub_top20,
mapping = aes(x = log2Ratio, y = -log10(pVal),
label = Index, color = Index),
size = 2, hjust = 0, nudge_x = 0.05, vjust = 0, nudge_y = 0.05) +
geom_table(data = dt, aes(table = Gene), x = dt_pos, y = ymax/2)
}
if (nchar(fn) > 50)
fn <- paste0(str_sub(fn, 1, 50), "...")
width <- if (is.null(dots$width)) {
if (nrow > 1L)
6*length(unique(dfw_sub$newContrast))/nrow + 1
else
6*length(unique(dfw$Contrast))/nrow
}
else {
dots$width
}
height <- if (is.null(dots$height)) 6*nrow else dots$height
ggsave_dots <- set_ggsave_dots(dots, c("filename", "plot", "width", "height"))
rlang::eval_tidy(
rlang::quo(
ggsave(filename = file.path(filepath, fml_nm, custom_prefix,
paste0(fn, ".", fn_suffix)),
plot = p,
width = width,
height = height,
!!!ggsave_dots)
)
)
write.table(dfw_sub,
file = file.path(filepath, fml_nm, custom_prefix, paste0(fn, ".txt")),
sep = "\t", col.names = TRUE, row.names = FALSE, quote = FALSE)
})
}
}
#' Volcano plots
#'
#' \code{pepVol} visualizes the volcano plots of peptide data.
#'
#' @rdname prnVol
#'
#' @import purrr
#' @export
pepVol <- function (scale_log2r = TRUE, complete_cases = FALSE, impute_na = FALSE,
adjP = FALSE, topn_labels = 20,
df = NULL, filepath = NULL, filename = NULL,
fml_nms = NULL, theme = NULL, highlights = NULL,
grids = NULL, ...)
{
check_dots(c("id", "anal_type", "df2"), ...)
check_depreciated_args(list(c("show_labels", "topn_labels")), ...)
id <- match_call_arg(normPSM, group_psm_by)
stopifnot(rlang::as_string(id) %in% c("pep_seq", "pep_seq_mod"),
length(id) == 1L)
scale_log2r <- match_pepSig_scale_log2r(scale_log2r = scale_log2r,
impute_na = impute_na)
df <- rlang::enexpr(df)
filepath <- rlang::enexpr(filepath)
filename <- rlang::enexpr(filename)
reload_expts()
info_anal(df = !!df,
df2 = NULL,
id = !!id,
filepath = !!filepath,
filename = !!filename,
scale_log2r = scale_log2r,
complete_cases = complete_cases,
impute_na = impute_na,
anal_type = "Volcano")(fml_nms = fml_nms,
adjP = adjP,
topn_labels = topn_labels,
theme = theme,
highlights = highlights,
grids = grids,
...)
}
#'Volcano plots
#'
#'\code{prnVol} visualizes the volcano plots of protein data.
#'
#'@inheritParams prnGSPA
#'@inheritParams prnHist
#'@param adjP Logical; if TRUE, use Benjamini-Hochberg pVals in volcano plots.
#' The default is FALSE.
#'@param topn_labels A non-negative integer; the top-n species for labeling in a
#' plot. At \code{topn_labels = 0}, no labels of proteins/peptides will be
#' shown. The default is to label the top-20 species with the lowest p-values.
#'@param highlights A list of entries for highlighting. See also \code{filter_}
#' varargs for the format.
#'@param grids An integer or integer vector for subset visualization of
#' contrasts within a group. For example with a group of three contrasts,
#' \code{grids = 2:3} will hide the first grid from displaying. At the
#' NULL default, all available grids will be shown.
#'@param ... \code{filter_}: Variable argument statements for the row filtration
#' against data in a primary file linked to \code{df}. See also
#' \code{\link{normPSM}} for the format of \code{filter_} statements. \cr \cr
#' Additional parameters for plotting: \cr \code{xco}, the cut-off lines of
#' fold changes at position \code{x}; the default is at \eqn{-1.2} and
#' \eqn{+1.2}. \cr \code{yco}, the cut-off line of \code{pVal} at position
#' \code{y}; the default is \eqn{0.05}. \cr \code{width}, the width of plot;
#' \cr \code{height}, the height of plot. \cr \code{nrow}, the number of rows
#' in a plot. \cr \code{xmin}, the minimum \code{x}. \cr \code{xmax}, the
#' maximum \code{x}. \cr \code{ymin}, the minimum \code{y}. \cr \code{ymax},
#' the maximum \code{y}. \cr \code{x_label}, the label on \code{x}. \cr
#' \code{y_label}, the label on \code{y}.
#'
#'@import dplyr ggplot2
#'@importFrom magrittr %>% %T>% %$% %<>%
#'
#'@example inst/extdata/examples/prnVol_.R
#'
#'@seealso \emph{Metadata} \cr \code{\link{load_expts}} for metadata preparation
#' and a reduced working example in data normalization \cr
#'
#' \emph{Data normalization} \cr \code{\link{normPSM}} for extended examples in
#' PSM data normalization \cr \code{\link{PSM2Pep}} for extended examples in
#' PSM to peptide summarization \cr \code{\link{mergePep}} for extended
#' examples in peptide data merging \cr \code{\link{standPep}} for extended
#' examples in peptide data normalization \cr \code{\link{Pep2Prn}} for
#' extended examples in peptide to protein summarization \cr
#' \code{\link{standPrn}} for extended examples in protein data normalization.
#' \cr \code{\link{purgePSM}} and \code{\link{purgePep}} for extended examples
#' in data purging \cr \code{\link{pepHist}} and \code{\link{prnHist}} for
#' extended examples in histogram visualization. \cr \code{\link{extract_raws}}
#' and \code{\link{extract_psm_raws}} for extracting MS file names \cr
#'
#' \emph{Variable arguments of `filter_...`} \cr \code{\link{contain_str}},
#' \code{\link{contain_chars_in}}, \code{\link{not_contain_str}},
#' \code{\link{not_contain_chars_in}}, \code{\link{start_with_str}},
#' \code{\link{end_with_str}}, \code{\link{start_with_chars_in}} and
#' \code{\link{ends_with_chars_in}} for data subsetting by character strings
#' \cr
#'
#' \emph{Missing values} \cr \code{\link{pepImp}} and \code{\link{prnImp}} for
#' missing value imputation \cr
#'
#' \emph{Informatics} \cr \code{\link{pepSig}} and \code{\link{prnSig}} for
#' significance tests \cr \code{\link{pepVol}} and \code{\link{prnVol}} for
#' volcano plot visualization \cr \code{\link{prnGSPA}} for gene set enrichment
#' analysis by protein significance pVals \cr \code{\link{gspaMap}} for mapping
#' GSPA to volcano plot visualization \cr \code{\link{prnGSPAHM}} for heat map
#' and network visualization of GSPA results \cr \code{\link{prnGSVA}} for gene
#' set variance analysis \cr \code{\link{prnGSEA}} for data preparation for
#' online GSEA. \cr \code{\link{pepMDS}} and \code{\link{prnMDS}} for MDS
#' visualization \cr \code{\link{pepPCA}} and \code{\link{prnPCA}} for PCA
#' visualization \cr \code{\link{pepLDA}} and \code{\link{prnLDA}} for LDA
#' visualization \cr \code{\link{pepHM}} and \code{\link{prnHM}} for heat map
#' visualization \cr \code{\link{pepCorr_logFC}}, \code{\link{prnCorr_logFC}},
#' \code{\link{pepCorr_logInt}} and \code{\link{prnCorr_logInt}} for
#' correlation plots \cr \code{\link{anal_prnTrend}} and
#' \code{\link{plot_prnTrend}} for trend analysis and visualization \cr
#' \code{\link{anal_pepNMF}}, \code{\link{anal_prnNMF}},
#' \code{\link{plot_pepNMFCon}}, \code{\link{plot_prnNMFCon}},
#' \code{\link{plot_pepNMFCoef}}, \code{\link{plot_prnNMFCoef}} and
#' \code{\link{plot_metaNMF}} for NMF analysis and visualization \cr
#'
#' \emph{Custom databases} \cr \code{\link{Uni2Entrez}} for lookups between
#' UniProt accessions and Entrez IDs \cr \code{\link{Ref2Entrez}} for lookups
#' among RefSeq accessions, gene names and Entrez IDs \cr \code{\link{prepGO}}
#' for
#' \code{\href{http://current.geneontology.org/products/pages/downloads.html}{gene
#' ontology}} \cr \code{\link{prepMSig}} for
#' \href{https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.0/}{molecular
#' signatures} \cr \code{\link{prepString}} and \code{\link{anal_prnString}}
#' for STRING-DB \cr
#'
#' \emph{Column keys in PSM, peptide and protein outputs} \cr
#' system.file("extdata", "psm_keys.txt", package = "proteoQ") \cr
#' system.file("extdata", "peptide_keys.txt", package = "proteoQ") \cr
#' system.file("extdata", "protein_keys.txt", package = "proteoQ") \cr
#'
#'@export
prnVol <- function (scale_log2r = TRUE, complete_cases = FALSE, impute_na = FALSE,
adjP = FALSE, topn_labels = 20,
df = NULL, filepath = NULL, filename = NULL,
fml_nms = NULL, theme = NULL, highlights = NULL,
grids = NULL, ...)
{
check_dots(c("id", "anal_type", "df2"), ...)
check_depreciated_args(list(c("show_labels", "topn_labels")), ...)
id <- match_call_arg(normPSM, group_pep_by)
stopifnot(rlang::as_string(id) %in% c("prot_acc", "gene"),
length(id) == 1L)
scale_log2r <- match_prnSig_scale_log2r(scale_log2r = scale_log2r,
impute_na = impute_na)
df <- rlang::enexpr(df)
filepath <- rlang::enexpr(filepath)
filename <- rlang::enexpr(filename)
reload_expts()
info_anal(df = !!df,
df2 = NULL,
id = !!id,
filepath = !!filepath,
filename = !!filename,
scale_log2r = scale_log2r,
complete_cases = complete_cases,
impute_na = impute_na,
anal_type = "Volcano")(fml_nms = fml_nms,
adjP = adjP,
topn_labels = topn_labels,
theme = theme,
highlights = highlights,
grids = grids,
...)
}
#'Volcano plots of protein \code{log2FC} under gene sets
#'
#'\code{gspaMap} visualizes the volcano plots of protein subgroups under the
#'same gene sets.
#'
#'@inheritParams prnGSPA
#'@inheritParams prnVol
#'@inheritParams prnHist
#'@inheritParams plot_prnTrend
#'@inheritParams anal_prnTrend
#'@param filename Use system default for each gene set.
#'@param show_sig Character string indicating the type of significance values to
#' be shown with \code{\link{gspaMap}}. The default is \code{"none"}.
#' Additional choices are from \code{c("pVal", "qVal")} where \code{pVal} or
#' \code{qVal} will be shown, respectively, in the facet grid of the plots.
#'@param gspval_cutoff Numeric value or vector for uses with
#' \code{\link{gspaMap}}. \code{Gene sets} with enrichment \code{pVals} less
#' significant than the threshold will be excluded from volcano plot
#' visualization. The default significance is 0.05 for all formulas matched to
#' or specified in argument \code{fml_nms}. Formula-specific threshold is
#' allowed by supplying a vector of cut-off values.
#'@param gslogFC_cutoff Numeric value or vector for uses with
#' \code{\link{gspaMap}}. \code{Gene sets} with absolute enrichment
#' \code{log2FC} less than the threshold will be excluded from volcano plot
#' visualization. The default magnitude is \code{log2(1.2) } for all formulas
#' matched to or specified in argument \code{fml_nms}. Formula-specific
#' threshold is allowed by supplying a vector of absolute values in
#' \code{log2FC}.
#'@param topn_gsets Numeric value or vector; top entries in gene sets ordered by
#' increasing \code{pVal} for visualization. The default is to use all
#' available entries.
#'
#' Note that it is users' responsibility to ensure that the custom gene sets
#' contain terms that can be found from the one or multiple preceding analyses
#' of \code{\link{prnGSPA}}. For simplicity, it is generally applicable to
#' include \emph{all} of the data bases that have been applied to
#' \code{\link{prnGSPA}} and in that way no terms will be missed for
#' visualization. See also \code{\link{prnGSPA}} for examples of custom data
#' bases.
#'@param ... \code{filter_}: Variable argument statements for the row filtration
#' against data in a primary file linked to \code{df}. See also
#' \code{\link{normPSM}} for the format of \code{filter_} statements and the
#' association between \code{filter_} and \code{df}. \cr \cr \code{filter2_}:
#' Variable argument statements for the row filtration against data in
#' secondary file(s) linked to \code{df2}. See also \code{\link{prnGSPAHM}} for
#' the format of \code{filter2_}, \code{normPSM} for the association between
#' \code{filter_} and \code{df}. \cr \cr Additional parameters for plotting:
#' \cr \code{xco}, the cut-off lines of fold changes at position \code{x}; the
#' default is at \eqn{-1.2} and \eqn{+1.2}. \cr \code{yco}, the cut-off line of
#' \code{pVal} at position \code{y}; the default is \eqn{0.05}. \cr
#' \code{width}, the width of plot; \cr \code{height}, the height of plot. \cr
#' \code{nrow}, the number of rows in a plot.
#'
#'@import dplyr ggplot2
#'@importFrom magrittr %>% %T>% %$% %<>%
#'
#'@example inst/extdata/examples/prnVol_.R
#'
#'@seealso
#' \emph{Metadata} \cr
#' \code{\link{load_expts}} for metadata preparation and a reduced working example in data normalization \cr
#'
#' \emph{Data normalization} \cr
#' \code{\link{normPSM}} for extended examples in PSM data normalization \cr
#' \code{\link{PSM2Pep}} for extended examples in PSM to peptide summarization \cr
#' \code{\link{mergePep}} for extended examples in peptide data merging \cr
#' \code{\link{standPep}} for extended examples in peptide data normalization \cr
#' \code{\link{Pep2Prn}} for extended examples in peptide to protein summarization \cr
#' \code{\link{standPrn}} for extended examples in protein data normalization. \cr
#' \code{\link{purgePSM}} and \code{\link{purgePep}} for extended examples in data purging \cr
#' \code{\link{pepHist}} and \code{\link{prnHist}} for extended examples in histogram visualization. \cr
#' \code{\link{extract_raws}} and \code{\link{extract_psm_raws}} for extracting MS file names \cr
#'
#' \emph{Variable arguments of `filter_...`} \cr
#' \code{\link{contain_str}}, \code{\link{contain_chars_in}}, \code{\link{not_contain_str}},
#' \code{\link{not_contain_chars_in}}, \code{\link{start_with_str}},
#' \code{\link{end_with_str}}, \code{\link{start_with_chars_in}} and
#' \code{\link{ends_with_chars_in}} for data subsetting by character strings \cr
#'
#' \emph{Missing values} \cr
#' \code{\link{pepImp}} and \code{\link{prnImp}} for missing value imputation \cr
#'
#' \emph{Informatics} \cr
#' \code{\link{pepSig}} and \code{\link{prnSig}} for significance tests \cr
#' \code{\link{pepVol}} and \code{\link{prnVol}} for volcano plot visualization \cr
#' \code{\link{prnGSPA}} for gene set enrichment analysis by protein significance pVals \cr
#' \code{\link{gspaMap}} for mapping GSPA to volcano plot visualization \cr
#' \code{\link{prnGSPAHM}} for heat map and network visualization of GSPA results \cr
#' \code{\link{prnGSVA}} for gene set variance analysis \cr
#' \code{\link{prnGSEA}} for data preparation for online GSEA. \cr
#' \code{\link{pepMDS}} and \code{\link{prnMDS}} for MDS visualization \cr
#' \code{\link{pepPCA}} and \code{\link{prnPCA}} for PCA visualization \cr
#' \code{\link{pepLDA}} and \code{\link{prnLDA}} for LDA visualization \cr
#' \code{\link{pepHM}} and \code{\link{prnHM}} for heat map visualization \cr
#' \code{\link{pepCorr_logFC}}, \code{\link{prnCorr_logFC}}, \code{\link{pepCorr_logInt}} and
#' \code{\link{prnCorr_logInt}} for correlation plots \cr
#' \code{\link{anal_prnTrend}} and \code{\link{plot_prnTrend}} for trend analysis and visualization \cr
#' \code{\link{anal_pepNMF}}, \code{\link{anal_prnNMF}}, \code{\link{plot_pepNMFCon}},
#' \code{\link{plot_prnNMFCon}}, \code{\link{plot_pepNMFCoef}}, \code{\link{plot_prnNMFCoef}} and
#' \code{\link{plot_metaNMF}} for NMF analysis and visualization \cr
#'
#' \emph{Custom databases} \cr
#' \code{\link{Uni2Entrez}} for lookups between UniProt accessions and Entrez IDs \cr
#' \code{\link{Ref2Entrez}} for lookups among RefSeq accessions, gene names and Entrez IDs \cr
#' \code{\link{prepGO}} for \code{\href{http://current.geneontology.org/products/pages/downloads.html}{gene
#' ontology}} \cr
#' \code{\link{prepMSig}} for \href{https://data.broadinstitute.org/gsea-msigdb/msigdb/release/7.0/}{molecular
#' signatures} \cr
#' \code{\link{prepString}} and \code{\link{anal_prnString}} for STRING-DB \cr
#'
#' \emph{Column keys in PSM, peptide and protein outputs} \cr
#' system.file("extdata", "psm_keys.txt", package = "proteoQ") \cr
#' system.file("extdata", "peptide_keys.txt", package = "proteoQ") \cr
#' system.file("extdata", "protein_keys.txt", package = "proteoQ") \cr
#'
#'@export
gspaMap <- function (gset_nms = c("go_sets", "c2_msig", "kinsub"),
scale_log2r = TRUE, complete_cases = FALSE,
impute_na = FALSE, df = NULL, df2 = NULL,
filepath = NULL, filename = NULL, fml_nms = NULL,
adjP = FALSE, topn_labels = 20,
show_sig = "none",
gspval_cutoff = 5E-2, gslogFC_cutoff = log2(1.2),
topn_gsets = Inf, theme = NULL, ...)
{
check_dots(c("id", "anal_type"), ...)
check_depreciated_args(list(c("pval_cutoff", "gspval_cutoff"),
c("logFC_cutoff", "gslogFC_cutoff"),
c("show_labels", "topn_labels")), ...)
id <- match_call_arg(normPSM, group_pep_by)
stopifnot(rlang::as_string(id) %in% c("prot_acc", "gene"),
length(id) == 1L)
scale_log2r <- match_prnSig_scale_log2r(scale_log2r = scale_log2r,
impute_na = impute_na)
df <- rlang::enexpr(df)
df2 <- rlang::enexpr(df2)
filepath <- rlang::enexpr(filepath)
filename <- rlang::enexpr(filename)
show_sig <- rlang::as_string(rlang::enexpr(show_sig))
stopifnot(show_sig %in% c("none", "pVal", "qVal"),
length(show_sig) == 1L)
check_gset_nms(gset_nms)
reload_expts()
info_anal(df = !!df,
df2 = !!df2,
id = !!id,
filepath = !!filepath,
filename = !!filename,
scale_log2r = scale_log2r,
complete_cases = complete_cases,
impute_na = impute_na,
anal_type = "mapGSPA")(fml_nms = fml_nms,
adjP = adjP,
topn_labels = topn_labels,
gspval_cutoff = gspval_cutoff,
gslogFC_cutoff = gslogFC_cutoff,
topn_gsets = topn_gsets,
show_sig = show_sig,
gset_nms = gset_nms,
theme = theme,
...)
}
GeomTable <- ggproto(
"GeomTable",
Geom,
required_aes = c("x", "y", "table"),
default_aes = aes(widthx = 10, widthy = 10, rownames = NA),
draw_key = draw_key_blank,
draw_panel = function(data, panel_scales, coord) {
if (nrow(data) != 1) {
stop(sprintf("only one table per panel allowed, got %s (%s)",
nrow(data),
as.character(data)),
call. = FALSE)
}
wx = data$widthx / 2
wy = data$widthy / 2
corners <- data.frame(x = c(data$x - wx, data$x + wx), y = c(data$y - wy, data$y + wy))
d <- coord$transform(corners, panel_scales)
table = read.csv(text = data$table, header = TRUE)
if (!is.na(data$rownames)) {
rownames(table) <-
unlist(strsplit(data$rownames, "|", fixed = TRUE))
}
x_rng <- range(d$x, na.rm = TRUE)
y_rng <- range(d$y, na.rm = TRUE)
vp <- grid::viewport(x = mean(x_rng), y = mean(y_rng),
width = diff(x_rng), height = diff(y_rng),
just = c("center", "center"))
grob <- gridExtra::tableGrob(table, rows = NULL, cols = NULL,
theme =
gridExtra::ttheme_minimal(core = list(fg_params=list(cex = .7)),
colhead = list(fg_params=list(cex = .7),
parse=TRUE),
rowhead = list(fg_params=list(cex = .7))))
grob$heights <- grob$heights*.6
## add a line across the header
# grob <- gtable_add_grob(
# grob,
# grobs = segmentsGrob(y1 = unit(0, "npc"), gp = gpar(lwd = 2.0)),
# t = 1,
# b = 1,
# l = 1,
# r = ncol(d) + 1
# )
grid::editGrob(grob, vp = vp, name = paste(grob$name, facet_id()))
}
)
facet_id <- local({
i <- 1
function() {
i <<- i + 1
i
}
})
#' Prints table in ggplot2 images
#'
#' @param mapping The same as ggplot2.
#' @param data same as ggplot2.
#' @param stat The same as ggplot2.
#' @param position same as ggplot2.
#' @param na.rm The same as ggplot2.
#' @param show.legend same as ggplot2.
#' @param inherit.aes The same as ggplot2.
#' @param ... same as ggplot2.
#' @export
geom_table <- function(mapping = NULL, data = NULL, stat = "identity",
position = "identity", na.rm = FALSE, show.legend = NA,
inherit.aes = TRUE, ...)
{
layer(geom = GeomTable,
mapping = mapping,
data = data,
stat = stat,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)
}
#' Converts a data frame column to a csv vector
#'
#' @param x A data frame column.
to_csv_ <- function(x)
{
paste(capture.output(write.csv(x, stdout(), row.names = F)), collapse = "\n")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.