##
## All plotting functions for computeReport() [main function]
##
#'
#' Plot contaminants from proteinGroups.txt
#'
#' @param data A data.frame with columns 'group', 'cont_pc', 'logAbdClass'
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame( 'group' = letters[1:10], 'cont_pc' = 2:11, 'logAbdClass' = c("low","high"))
#' plot_ContsPG(data)
#'
plot_ContsPG = function(data)
{
data$section = as.integer(seq(0, nrow(data)/correctSetSize(nrow(data),30)*0.999, length.out=nrow(data)))
p = ggplot(data=data, aes_string(x = "group", y = "cont_pc", alpha="logAbdClass")) +
suppressWarnings(## supresses 'Using alpha for a discrete variable is not advised'
scale_alpha_discrete(range = c(c(0.3, 1)[(length(unique(data$logAbdClass))==1) + 1], 1.0), ## ordering of range is critical!
name = "Abundance\nclass")) +
geom_col() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
xlab("") +
ggtitle("PG: Contaminant per condition") +
ylab("contaminant (% intensity)") +
geom_hline(aes_string(yintercept = "5"), linetype = 'dashed')
if (length(unique(data$section))>1) p = p + facet_wrap(~ section, ncol = 1, scales="free_x")
#print(p)
return (p)
}
#'
#' Plot user-defined contaminants from evidence.txt
#'
#' Kolmogorov-Smirnoff p-values are plotted on top of each group.
#' High p-values indicate that Andromeda scores for contaminant peptides
#' are equal or higher compared to sample peptide scores, i.e. the probability that
#' sample peptides scores are NOT greater than contaminant peptide scores.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'variable', 'value'
#' @param name_contaminant Name of the contaminant shown in title
#' @param extra_limit Position where a h-line is plotted (for visual guidance)
#' @param subtitle Optional subtitle for plot
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(fc.raw.file = letters[1:3],
#' variable = c(rep("spectralCount", 3),
#' rep("intensity", 3),
#' rep("above.thresh", 3),
#' rep("score_KS", 3)),
#' value = c(10, 20, 15, 9, 21, 14, 0, 1, 1, 0.3, 0.01, 0.04))
#' plot_ContUser(data, "myco", 5, "subtitle")
#'
plot_ContUser = function(data, name_contaminant, extra_limit, subtitle = NULL)
{
datav = subset(data, data$variable %in% c('spectralCount', "intensity"))
datav$section = assignBlocks(datav$fc.raw.file, set_size = 40, sort_values = TRUE)
dataAT = subset(data, data$variable %in% c('above.thresh'))
## contRaws might be empty
contRaws = dataAT$fc.raw.file[ dataAT$value == TRUE]
dataKS = subset(data, data$variable == 'score_KS' & (data$fc.raw.file %in% contRaws))
if (nrow(dataKS)>0) {
dataKS$value = paste0("p = ", round(dataKS$value,2))
## use the same section, so ggplot knows how to subset the data
dataKS$section = datav$section[match(dataKS$fc.raw.file, datav$fc.raw.file)]
}
#cat(paste0("CA entry is ", extra_limit, "\n"))
maxY = max(datav$value, extra_limit)
p = ggplot(datav, aes_string(x = "fc.raw.file", y = "value")) +
geom_col(aes_string(fill = "variable"), position = "dodge", width=.7) +
ggtitle(paste0("EVD: Contaminant '", name_contaminant, "'"), subtitle) +
xlab("") +
ylab("abundance fraction (%)") +
ylim(c(0, maxY * 1.1)) +
theme(plot.title = element_text(colour = "red"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_fill_discrete(name = "Method") +
geom_hline(yintercept = extra_limit, linetype = 'dashed')
## group(NULL) seems important in geom_text()
if (nrow(dataKS)>0) p = p + geom_text(data = dataKS, aes_string(label = "value", y = maxY * 1.05, group = NULL))
p = p + facet_wrap(~ section, ncol = 1, scales = "free_x")
#print(p)
return(p)
}
#'
#' Plot Andromeda score distribution of contaminant peptide vs. matrix peptides.
#'
#' The data is expected to be an ECDF already, x being the Andromeda score, y being the culmulative probability.
#' The Score is the probability of a Kolm.-Smirnoff test that the contaminant scores are larger (i.e.
#' large p-values indicate true contamination).
#' You will only see this plot if the %-threshold (YAML config) was reached. This is a saveguard against false-positive,
#' but high-scoring contaminant peptides, which would erroneously give you a large p-value and make you believe
#' your sample is contaminated although that's not the case.
#'
#' @param data A data.frame with columns 'x', 'y', 'condition'
#' @param raw.file Name of Raw file for which the data is displayed (will become part of the plot title)
#' @param score Score of how distinct the distributions are (will become part of the title)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(x = 10:60,
#' y = c(seq(0,1,length=51), seq(0.1, 1, length=51)),
#' condition = rep(c("sample","contaminant"), each=51))
#' plot_ContUserScore(data, 'test file', 0.96)
#'
plot_ContUserScore = function(data, raw.file, score) {
p = ggplot(data) +
geom_line(aes_string(x = "x", y = "y", col = "condition")) +
ggtitle(paste0("Empirical CDF of '", raw.file, "'\np = ", round(score, 2))) +
ylab("Pr") +
xlab("Andromeda score")
return(p)
}
#'
#' Plot contaminants from evidence.txt, broken down into top5-proteins.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'contaminant', 'pname', 'intensity'
#' @param top5 Name of the Top-5 Proteins (by relative intensity or whatever seems relevant)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(intensity = 1:12,
#' pname = rep(letters[1:3], 4),
#' fc.raw.file = rep(paste("f", 1:4), each=3),
#' contaminant = TRUE)
#' ## providing more proteins than present... d,e will be ignored
#' plot_ContEVD(data, top5 = letters[1:5])
#' ## classify 'c' as 'other'
#' plot_ContEVD(data, top5 = letters[1:2])
#'
plot_ContEVD = function(data, top5)
{
#top5 = cont.top5.names
if (is.null(top5)) stop("Function plot_ContEVD() called with invalid argument. Please report this bug.")
if (length(top5) > 5) stop("Top5 protein list is longer than 5 (which is the maximum allowed).")
intensity = NULL ## to make R CHECK happy...
data.sub = data[data$contaminant > 0,]
## rewrite prot names, and subsume 6th and below as 'other'
data.sub$pname = as.character(data.sub$pname)
data.sub[!(data.sub$pname %in% top5), "pname"] = 'other'
## aggregate identical proteins
## use sum(as.numeric(.)) to prevent overflow
d_sum = plyr::ddply(data.sub[, c("intensity", "pname", "fc.raw.file")], c("pname", "fc.raw.file"),
function(x) plyr::summarise(x, s.intensity=sum(as.numeric(intensity), na.rm = TRUE)))
## normalize by total intensity of raw file
d_norm = plyr::ddply(data[, c("intensity", "fc.raw.file")], "fc.raw.file",
function(x) plyr::summarise(x, total.intensity=sum(as.numeric(intensity), na.rm = TRUE)))
d_sum$total.intensity = d_norm$total.intensity[match(d_sum$fc.raw.file, d_norm$fc.raw.file)]
d_sum$Log10Diff = getAbundanceClass(log10(d_sum$total.intensity))
d_sum$s.intensity = d_sum$s.intensity / d_sum$total.intensity * 100
## shorten protein-groups (at most two protein names)
d_sum$pname = sapply(d_sum$pname, function(x) {
p.split = unlist(strsplit(x, split=";"))
## shorten entries as well (at most 15 characters)
p.split_s = sapply(p.split[1:(min(2, length(p.split)))], function(x) ifelse(nchar(x)>15, paste0(substr(x, start=1, stop=13), ".."), x))
r = paste(p.split_s, sep="", collapse=";")
if (length(p.split)>2) r=paste0(r, ";..")
return(r)
})
## order of pname determines order of bars
d_sum = rbind(d_sum[d_sum$pname!="other",], d_sum[d_sum$pname=="other",])
## value of factors determines order in the legend
## --> make proteins a factor, with 'other' being the first
d_sum$Protein = factor(d_sum$pname, levels = unique(c("other", d_sum$pname)), ordered = TRUE)
head(d_sum)
## plot
p = ggplot(d_sum, aes_string( x = "fc.raw.file",
y = "s.intensity",
fill = "Protein")) +
geom_col(aes_string(alpha = "Log10Diff")) +
suppressWarnings(## suppresses 'Using alpha for a discrete variable is not advised'
scale_alpha_discrete(range = c(c(0.3, 1)[(length(unique(d_sum$Log10Diff))==1) + 1], 1.0),
name = "Abundance\nclass")) +
xlab("") +
theme_bw() +
ggtitle("EVD: Top5 Contaminants per Raw file") +
ylab("contaminant (% intensity)") +
scale_fill_manual(values = RColorBrewer::brewer.pal(6,"Accent")) +
scale_colour_manual(values = RColorBrewer::brewer.pal(6,"Accent")) +
geom_hline(aes_string(yintercept = "5"), linetype='dashed') +
#guides(alpha=NULL, fill = guide_legend(nrow = 2, ncol = 3, byrow = TRUE, reverse = TRUE)) +
#theme(legend.position="top", legend.title=element_blank()) +
coord_flip() +
scale_x_discrete_reverse(d_sum$fc.raw.file)
#print(p)
return(p)
}
#'
#' Plot ratios of labeled data (e.g. SILAC) from proteinGroups.txt
#'
#' The 'x' values are expected to be log2() transformed already.
#'
#' @param df_ratios A data.frame with columns 'x', 'y', 'col', 'ltype'
#' @param d_range X-axis range of plot
#' @param main_title Plot title
#' @param main_col Color of title
#' @param legend_title Legend text
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' x1 = seq(-3, 3, by = 0.1)
#' y1 = dnorm(x1)
#' x2 = seq(-5, 1, by = 0.1)
#' y2 = dnorm(x2, mean = -1)
#' data = data.frame( x = c(x1,x2),
#' y = c(y1,y2),
#' col = c(rep("ok", length(x1)), rep("shifted", length(x2))),
#' ltype = c(rep("solid", length(x1)), rep("dotted", length(x2))))
#' plot_RatiosPG(data, range(data$x), "Ratio plot", "red", "group")
#'
plot_RatiosPG = function(df_ratios, d_range, main_title, main_col, legend_title)
{
br = c(2, 5, 10, 20);
p =
ggplot(data = df_ratios, aes_string(x = "x", y = "y", colour = "col")) +
facet_grid(col ~ ., scales = "free_y") +
geom_line(size = 1.2) +
geom_area(aes_string(alpha = "ltype", fill = "col")) +
xlab("ratio") +
ylab("density") +
scale_fill_manual(values = rep(RColorBrewer::brewer.pal(6,"Accent"), times=40), guide = guide_legend(legend_title)) +
scale_colour_manual(values = rep(RColorBrewer::brewer.pal(6,"Accent"), times=40)) +
suppressWarnings(scale_alpha_discrete(range = c(1, 0.2),
labels=c("dotted"="unimodal", "solid"="multimodal"),
guide = guide_legend("shape")
)) +
scale_x_continuous(limits = d_range, trans = "identity", breaks = c(-br, 0, br), labels=c(paste0("1/",2^(br)), 1, 2^br)) +
guides(color = "none") +
theme(plot.title = element_text(colour = main_col)) +
theme_bw() +
geom_vline(alpha = 0.5, xintercept = 0, colour = "green", linetype = "dashed", size = 1.5) +
ggtitle(main_title)
#print(p)
return (p)
}
#'
#' Plot Protein groups per Raw file
#'
#' The input is a data.frame with protein/peptide counts, where 'category' designates
#' the origin of information (genuine ID, transferred ID, or both).
#'
#' @param data A data.frame with columns 'fc.raw.file', 'counts', 'category'
#' @param y_max Plot limit of y-axis
#' @param thresh_line Position of a threshold line, indicating the usual target value
#' @param title Main title, and optional subtitle (if vector of length 2 is provided)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(fc.raw.file = rep(c("file A", "file B"), each=3),
#' counts = c(3674, 593, 1120, 2300, 400, 600),
#' category = c("genuine","genuine+transferred","transferred"))
#' plot_CountData(data, 6000, 4000, c("EVD: Protein Groups count", "gain: 23%"))
#'
plot_CountData = function(data, y_max, thresh_line, title)
{
title_main = title[1]
title_sub = ifelse(length(title) > 1, title[2], "")
p = ggplot(data, aes_string(x = "fc.raw.file", y = "counts", fill = "category")) +
geom_col(position = position_stack(reverse = TRUE)) +
xlab("") +
ylab("count") +
scale_x_discrete_reverse(data$fc.raw.file) +
ylim(0, y_max) +
scale_fill_manual(values = c("green", "#BEAED4", "blue")) +
ggtitle(title_main, title_sub) +
geom_abline(alpha = 0.5, intercept = thresh_line, slope = 0, colour = "black", linetype = "dashed", size = 1.5) +
coord_flip()
return(p)
}
#'
#' Plot RT peak width over time
#'
#' The input is a data.frame with already averaged counts over binned RT-slices.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'RT', 'peakWidth'
#' @param x_lim Plot range of x-axis
#' @param y_lim Plot range of y-axis
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(fc.raw.file = rep(c("file A", "file B", "file C"), each=81),
#' RT = c(20:100),
#' peakWidth = c(rnorm(81, mean=20), rnorm(81, mean=10), rnorm(81, mean=30)))
#' plot_RTPeakWidth(data, c(10, 100), c(0, 40))
#'
plot_RTPeakWidth = function(data, x_lim, y_lim)
{
p = ggplot(data) +
geom_line(aes_string(x = "RT", y = "peakWidth", colour = "fc.raw.file"), size=1, alpha=0.7) +
scale_color_manual(values = brewer.pal.Safe(length(unique(data$fc.raw.file)), "Set1")) +
guides(color = guide_legend(title = "Raw file\n(avg. peak width)")) +
xlab("retention time [min]") +
ylab("peak width [min]") +
coord_cartesian(xlim = x_lim, ylim = y_lim) + ## zoom in y -- do not cut data (preserve lines)
ggtitle("EVD: Peak width over RT") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
#print(p)
return(p)
}
#'
#' Plot MaxQuant Match-between-runs alignment performance.
#'
#' The plots shows the correction function applied by MaxQuant, and the
#' residual RT (ideally 0) of each peptide to its reference. Uncalibrated peptides
#' are shown in red, calibrated ones in green.
#' The MaxQuant RT correction which was applied prior is shown in blue. The range of this function
#' can give hints if the allowed RT search window (20min by default) is sufficient or if
#' MaxQuant should be re-run with more tolerant settings.
#'
#' The input is a data.frame with columns
#' 'calibrated.retention.time' - resulting (hopefully) calibratated RT after MQ-recal (the X-axis of the plot)
#' 'retention.time.calibration' - delta applied by MaxQuant
#' 'rtdiff' - remaining RT diff to reference peptide of the same sequence
#' 'RTdiff_in' - is the feature aligned (within 'match_tol')?
#' 'fc.raw.file_ext' - raw file
#' where each row represents one peptide whose RT was corrected by MaxQuant.
#'
#' @param data A data.frame with columns as described above
#' @param y_lim Plot range of y-axis
#' @param title_sub Subtitle
#' @param match_tol Maximal residual RT delta to reference (usually ~1 min)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(fc.raw.file_ext = "file A", ## more than one would be possible
#' calibrated.retention.time = c(20:100),
#' retention.time.calibration = 6 + sin((20:100)/10))
#' data$rtdiff = rnorm(nrow(data))
#' data$RTdiff_in = c("green", "red")[1 + (abs(data$rtdiff) > 0.7)]
#'
#' plot_MBRAlign(data, c(-10, 10), "fancy subtitle", 0.7)
#'
plot_MBRAlign = function(data, y_lim, title_sub, match_tol)
{
#data = evd_RT_t[ evd_RT_t$fc.raw.file == "file 13",]
p = ggplot(data, aes_string(x = "calibrated.retention.time", y = "retention.time.calibration")) +
## the MaxQuant correction (plot real data, no spline, since it can be very irregular)
geom_line(aes(alpha = 0.7), color = "blue") +
## PTXQC correction
geom_point(aes_string(x = "calibrated.retention.time", y = "rtdiff", color = "RTdiff_in"), alpha = 0.5) +
scale_alpha(name = 'Alignment function',
labels = list(expression("MaxQuant" ~ Delta*"RT")),
range = c(1,1)) +
scale_colour_manual(name = expression(bold("ID pairs ("*Delta*"RT to Ref)")),
values = c("green" = "green", "red" = "red"),
labels=c("green" = paste0("good (<", match_tol, "min)"),
"red" = paste0("bad (>", match_tol, "min)"))) +
guides(colour = guide_legend(order = 2),
alpha = guide_legend(order = 1)) + ## alpha-legend on top, color below
ylim(y_lim) +
xlab("corrected RT [min]") +
ylab(expression(Delta*"RT [min]")) +
facet_wrap(~ fc.raw.file_ext) +
ggtitle("EVD: MBR - alignment", title_sub)
#print(p)
return(p)
}
#'
#' Plot MaxQuant Match-between-runs id transfer performance.
#'
#' The plots shows the different categories of peak classes
#'
#' The input is a data.frame with columns
#' 'fc.raw.file' - raw file name
#' 'single' - fraction of peptides with are represent only once
#' 'multi.inRT' - fraction of peptides with are represent multiple times,
#' but within a certain RT peak width
#' 'multi.outRT' - fraction of peptides with are represent multiple times,
#' with large RT distance
#' 'sample' - raw file
#' where each row represents one peptide sequence.
#'
#' @param data A data.frame with columns as described above
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(c("file A", "file B"), each = 3),
#' single = c(0.9853628, 0.8323160, 0.9438375,
#' 0.9825538, 0.8003763, 0.9329961),
#' multi.inRT = c(0.002927445, 0.055101018, 0.017593087,
#' 0.005636457, 0.099640044, 0.031870056),
#' multi.outRT = c(0.01170978, 0.11258294, 0.03856946,
#' 0.01180972, 0.09998363, 0.03513386),
#' sample = rep(c("genuine", "transferred", "all"), 2))
#' plot_MBRIDtransfer(data)
#'
plot_MBRIDtransfer = function(data)
{
data.m = reshape2::melt(data, id.vars=c("fc.raw.file", "sample"))
data.m$value = data.m$value * 100 ## used to be scores in [0-1]
if (all(is.na(data.m$value)))
{# the slice of Raw file we are looking at could have no MBR data -- and ggplot needs something to plot...
data.m$value = 0
}
p = ggplot(data.m) +
geom_col(aes_string(x="fc.raw.file", y="value", fill="variable"), position = position_stack(reverse = TRUE)) +
scale_fill_manual("peak class",
values = c("single"="green", "multi.inRT"="lightgreen", "multi.outRT"="red"),
labels=c("single", "group (in width)", "group (out width)")) +
ylim(0, 100.1) + ## ggplot might not show the last (red) group upon 100.0
xlab("") +
ylab("fraction of 3D-peaks [%]") +
coord_flip() +
scale_x_discrete_reverse(factor(data$fc.raw.file)) +
ggtitle("EVD: MBR - ID Transfer") +
facet_wrap(~sample)
#print(p)
return(p)
}
#'
#' Plot MaxQuant Match-between-runs id transfer performance.
#'
#' The plots shows the different categories of peak classes
#'
#' The input is a data.frame with columns
#' 'fc.raw.file' - raw file name
#' 'single' - fraction of peptides with are represent only once
#' 'multi.inRT' - fraction of peptides with are represent multiple times,
#' but within a certain RT peak width
#' 'multi.outRT' - fraction of peptides with are represent multiple times,
#' with large RT distance
#' 'sample' - raw file
#' where each row represents one peptide sequence.
#'
#' @param data A data.frame with columns as described above
#' @param title_sub Subtitle text
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = paste("file", letters[1:4]),
#' abs = c(5461, 5312, 3618, 502),
#' pc = c(34, 32, 22, 2))
#' plot_MBRgain(data, "MBR gain: 18%")
#'
plot_MBRgain = function(data, title_sub = "")
{
p = ggplot(data = data, aes_string(x = "abs", y = "pc", col = "fc.raw.file")) +
geom_point(size=2) +
ggtitle("EVD: Peptides inferred by MBR", title_sub) +
xlab("number of transferred ID's") +
ylab("gain on top of genuine IDs [%]") +
xlim(0, max(data$abs, na.rm = TRUE)*1.1) + ## accounting for labels near the border
ylim(0, max(data$pc, na.rm = TRUE)*1.1) +
guides(color = "none") +
geom_text(aes_string(hjust = -0.1, label = "fc.raw.file"), show.legend = FALSE, check_overlap = TRUE)
#print(p)
return(p)
}
#'
#' The plots shows the charge distribution per Raw file.
#' The output of 'mosaicize()' can be used directly.
#'
#' The input is a data.frame with columns
#' 'Var1' - name of the Raw file
#' 'Var2' - charge (used as fill color)
#' 'Var1_center' - contains X-position of the Raw file
#' 'Var2_height' - relative frequency of the charge
#' 'Margin_var1' -
#' where each row represents one peptide sequence.
#'
#' @param d_charge A data.frame with columns as described above
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(raw.file = c(rep('file A', 100), rep('file B', 40)),
#' data = c(rep(2, 60), rep(3, 30), rep(4, 10),
#' rep(2, 30), rep(3, 7), rep(4, 3)))
#' plot_Charge(mosaicize(data))
#'
plot_Charge = function(d_charge)
{
p = ggplot(d_charge, aes_string(x = "Var1_center", y = "Var2_height", width = "Margin_var1")) +
geom_col(aes_string(fill = "Var2"), color = "black", position = position_stack(reverse = TRUE)) +
geom_text(aes_string(label = "Var1", x = "Var1_center", y = 1.05)) +
xlab("Raw file") +
ylab("fraction [%]") +
guides(fill = guide_legend(title="charge"),
color = "none") + # avoid black line in legend
scale_x_reverse() +
coord_flip() +
theme(axis.text.y = element_blank(), axis.ticks = element_blank()) +
ggtitle("EVD: charge distribution")
#print(p)
return(p)
}
#'
#' Plot IDs over time for each Raw file.
#'
#' Uses plot_DataOverRT() internally.
#'
#' @param data A data.frame with columns as described above
#' @param x_lim Limits of the x-axis (2-tuple)
#' @param y_max Maximum of the y-axis (single value)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(paste('file', letters[1:3]), each=30),
#' RT = seq(20, 120, length.out = 30),
#' counts = c(rnorm(30, 400, 20), rnorm(30, 250, 15), rnorm(30, 50, 15)))
#' plot_IDsOverRT(data)
#'
plot_IDsOverRT = function(data, x_lim = range(data$RT), y_max = max(data$counts))
{
return(plot_DataOverRT(data, "EVD: IDs over RT", "ID count", x_lim, y_max))
}
#'
#' Plot some count data over time for each Raw file.
#'
#' The input is a data.frame with columns
#' 'RT' - RT in seconds, representing one bin
#' 'counts' - number of counts at this bin
#' 'fc.raw.file' - name of the Raw file
#' where each row represents one bin in RT.
#'
#' At most nine(!) Raw files can be plotted. If more are given,
#' an error is thrown.
#'
#'
#' @param data A data.frame with columns as described above
#' @param title The plot title
#' @param y_lab Label of y-axis
#' @param x_lim Limits of the x-axis (2-tuple)
#' @param y_max Maximum of the y-axis (single value)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(paste('file', letters[1:3]), each=30),
#' RT = seq(20, 120, length.out = 30),
#' counts = c(rnorm(30, 400, 20), rnorm(30, 250, 15), rnorm(30, 50, 15)))
#' plot_DataOverRT(data, "some title", "count data")
#'
plot_DataOverRT = function(data, title, y_lab, x_lim = range(data$RT), y_max = max(data$counts))
{
nrOfRaws = length(unique(data$fc.raw.file))
p = ggplot(data = data) +
geom_line(aes_string(x = "RT", y = "counts", colour = "fc.raw.file", linetype = "fc.raw.file")) +
xlim(x_lim) +
xlab("RT [min]") +
ylim(from = 0, to = y_max) +
ylab(y_lab) +
ggtitle(title) +
guides(colour = guide_legend(title="Raw file"), linetype = "none") +
scale_linetype_manual(values = rep_len(c("solid", "dashed"), nrOfRaws)) +
scale_color_manual(values = brewer.pal.Safe(nrOfRaws, "Set1"))
#print(p)
return(p)
}
#'
#' Plot percent of identified MS/MS for each Raw file.
#'
#' Useful for a first overall impression of the data.
#'
#' The input is a data.frame with columns
#' 'fc.raw.file' - name of the Raw file
#' 'ms.ms.identified....' - fraction of identified MS/MS spectra in percent
#' 'cat' - identification category as arbitrary string
#' where each row represents one Raw file.
#'
#' @param data A data.frame with columns as described above
#' @param id_rate_bad Number below which the ID rate is considered bad
#' @param id_rate_great Number above which the ID rate is considered great
#' @param label_ID Named vector with colors for the categories given in data$cat
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' id_rate_bad = 20; id_rate_great = 35;
#' label_ID = c("bad (<20%)" = "red", "ok (...)" = "blue", "great (>35%)" = "green")
#' data = data.frame(fc.raw.file = paste('file', letters[1:3]),
#' ms.ms.identified.... = rnorm(3, 25, 15))
#' data$cat = factor(cut(data$ms.ms.identified....,
#' breaks=c(-1, id_rate_bad, id_rate_great, 100),
#' labels=names(label_ID)))
#' plot_IDRate(data, id_rate_bad, id_rate_great, label_ID)
#'
plot_IDRate = function(data, id_rate_bad, id_rate_great, label_ID)
{
p = ggplot(data, aes_string(y = "fc.raw.file", x = "ms.ms.identified....")) +
geom_point(aes_string(colour = "cat")) +
geom_vline(xintercept = id_rate_bad, color=(label_ID)[1]) +
geom_vline(xintercept = id_rate_great, color=(label_ID)[3]) +
ylab("") +
xlab("MS/MS identified [%]") +
scale_colour_manual(values=label_ID) +
ggtitle("SM: MS/MS identified per Raw file") +
xlim(0, max(data$ms.ms.identified...., id_rate_great)*1.1) +
guides(color = guide_legend(title="ID class")) +
scale_y_discrete_reverse(data$fc.raw.file, breaks = ggAxisLabels)
#print(p)
return(p)
}
#'
#' Colored table plot.
#'
#' Code taken from http://stackoverflow.com/questions/23819209/change-text-color-for-cells-using-tablegrob-in-r
#'
#' @param data Table as Data.frame
#' @param colours Single or set of colours (col-wise)
#' @param fill Cell fill (row-wise)
#' @param just (ignored)
#' @return gTable
#'
plotTableRaw = function(data, colours="black", fill=NA, just="centre")
{
label_matrix = as.matrix(data)
nc = ncol(label_matrix)
nr = nrow(label_matrix)
n = nc*nr
colours <- rep(colours, length.out = n)
fill <- rep(fill, length.out = n)
justs <- rep(just, length.out = n)
## text for each cell
labels <- lapply(seq_len(n), function(ii)
grid::textGrob(as.character(label_matrix[ii]), gp = grid::gpar(fontsize=8, col=colours[ii]), just="left", x = grid::unit(0.05, "npc")))
label_grobs <- matrix(labels, ncol=nc)
## define the fill background of cells
fill <- lapply(seq_len(n), function(ii)
grid::rectGrob(gp = grid::gpar(fill=fill[ii])))
## some calculations of cell sizes
row_heights <- function(m){
do.call(grid::unit.c, apply(m, 1, function(l)
max(do.call(grid::unit.c, lapply(l, grid::grobHeight)))))
}
col_widths <- function(m){
do.call(grid::unit.c, apply(m, 2, function(l)
max(do.call(grid::unit.c, lapply(l, grid::grobWidth)))))
}
## place labels in a gtable
g <- gtable::gtable_matrix("table", grobs = label_grobs,
widths = col_widths(label_grobs) + grid::unit(2,"mm"),
heights = row_heights(label_grobs) + grid::unit(2,"mm"))
## add the background
xt <- rep(seq_len(nr), each=nc)
xl <- rep(seq_len(nc), times=nr)
g <- gtable::gtable_add_grob(g, fill, t=xt, l=xl, z=0, name="fill")
return(g)
}
#'
#' Create an HTML table with an extra header row
#'
#'
#' @param data A data.frame which serves as table
#' @param caption A set of headlines, e.g. c("top line", "bottom line")
#' @return table as html character string for cat()'ing into an html document
#'
#' @import htmlTable
#' @import magrittr
#'
#' @export
#'
#' @examples
#' data = data.frame(raw.file = letters[1:4],
#' id.rate = 3:6)
#' getHTMLTable(data,
#' caption = "some header line")
#'
getHTMLTable = function(data, caption = NA)
{
tbl = htmlTable::addHtmlTableStyle(data,
align = 'l', ## align columns left
col.rgroup = c("none", "#F7F7F7"))
tbl = htmlTable::htmlTable(tbl, rnames = FALSE, ## no row names
caption = caption)
return(tbl)
}
#'
#' Plot a table with row names and title
#'
#' Restriction: currently, the footer will be cropped at the table width.
#'
#' @param data A data.frame with columns as described above
#' @param title Table title
#' @param footer Footer text
#' @param col_names Column names for Table
#' @param fill Fill pattern (by row)
#' @param col Text color (by column)
#' @param just (ignored)
#' @return gTree object with class 'PTXQC_table'
#'
#' @export
#'
#' @examples
#' data = data.frame(raw.file = letters[1:4],
#' id.rate = 3:6)
#' plotTable(data,
#' title = "Bad files",
#' footer = "bottom",
#' col_names = c("first col", "second col"),
#' col=c("red", "green"))
#'
plotTable = function(data, title = "", footer = "", col_names = colnames(data), fill = c("grey90", "grey70"), col = "black", just="centre")
{
## add column names
data2 = rbind(col_names, apply(data, 2, function(x) as.character(x)))
## create table
n = nrow(data2)*ncol(data2)
nd = nrow(data)*ncol(data)
table = plotTableRaw(data2,
fill = c(rep("grey50", ncol(data)), rep(fill, each=ncol(data), length.out=nd)), ## row-wise
colours = unlist(lapply(col, function(cc) c("black", rep(cc, nrow(data))))), ## col-wise
just = c(rep("centre", ncol(data)), rep(just, each=nrow(data), length.out=nd)))
colhead = lapply(col_names, function(ii) grid::textGrob(ii, gp = grid::gpar(fontsize=12, col="black", fontface="bold", fill="grey")))
## replace column names
table = gtable::gtable_add_grob(table, colhead, t = 1, l = 1:ncol(data))
if (nchar(title[1]) > 0)
{
gtitle = grid::textGrob(title, gp = grid::gpar(fontsize = 14))
padding = grid::unit(1.5, "line")
## add heading (white space)
table = gtable::gtable_add_rows(table, heights = grid::grobHeight(gtitle) + padding, pos = 0)
## add heading (text as overlay)
table = gtable::gtable_add_grob(table, list(gtitle), t = 1, l = 1, r = ncol(table), clip = "off")
}
if (nchar(footer[1]) > 0)
{
gfooter = grid::textGrob(footer, gp = grid::gpar(fontsize = 10))
padding = grid::unit(1.5, "line")
## add heading (white space)
table = gtable::gtable_add_rows(table, heights = grid::grobHeight(gfooter) + padding, pos = -1) ## bottom
## add heading (text as overlay)
table = gtable::gtable_add_grob(table, list(gfooter), t = nrow(table), l = 1, r = ncol(table), clip = "off")
}
## neat trick to enable calling print(g), to mimic ggplot-behaviour on this object
## in combination with print.PTXQC_table() -- see below
p = grid::gTree(children = grid::gList(table), cl = c("PTXQC_table"))
## hide the table name inside (for qcMetric::getTitles())
p$labels$title = title
#print(p)
return(p)
}
#' helper S3 class, enabling print(some-plot_Table-object)
#' @param x Some Grid object to plot
#' @param ... Further arguments (not used, but required for consistency with other print methods)
#' @return NULL
#'
#' @export
#'
print.PTXQC_table = function(x, ...) {
grid::grid.newpage();
grid::grid.draw(x)
return(NULL)
}
#'
#' A boxplot of uncalibrated mass errors for each Raw file.
#'
#' Boxes are optionally colored to indicate that a MQ bug was detected or
#' if PTXQC detected a too narrow search window.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'uncalibrated.mass.error..ppm.'
#' @param MQBug_raw_files List of Raw files with invalid calibration values
#' @param stats A data.frame with columns 'fc.raw.file', 'sd', 'outOfCal'
#' @param y_lim Range of y-axis
#' @param extra_limit Position where a v-line is plotted (for visual guidance)
#' @param title_sub Subtitle
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' n = c(150, 1000, 1000, 1000)
#' data = data.frame(fc.raw.file = repEach(letters[4:1], n),
#' uncalibrated.mass.error..ppm. = c(rnorm(n[1], 13, 2.4),
#' rnorm(n[2], 1, 0.5),
#' rnorm(n[3], 3, 0.7),
#' rnorm(n[4], 4.5, 0.8)))
#' stats = data.frame(fc.raw.file = letters[4:1],
#' sd_uncal = c(2.4, 0.5, 0.7, 0.8),
#' outOfCal = c(TRUE, FALSE, FALSE, FALSE))
#' plot_UncalibratedMSErr(data, MQBug_raw_files = letters[1],
#' stats, y_lim = c(-20,20), 15, "subtitle")
#'
plot_UncalibratedMSErr = function(data, MQBug_raw_files, stats, y_lim, extra_limit, title_sub)
{
data$col = "default"
if (length(MQBug_raw_files) > 0)
{
data$col[data$fc.raw.file %in% MQBug_raw_files] = "MQ bug"
}
## add 'out-of-calibration' Raw files:
data$col[data$fc.raw.file %in% stats$fc.raw.file[stats$outOfCal]] = "out-of-search-tol"
## only show legend if special things happen
showColLegend = ifelse(length(setdiff(data$col, "default")) > 0, "legend", "none")
## amend SD to fc.raw.file
stats$fcr_new_lvl = paste0(stats$fc.raw.file, " (sd = ", stats$sd_uncal, "ppm)")
## use augmented name
data$fc.raw.file_ext = stats$fcr_new_lvl[ match(data$fc.raw.file, stats$fc.raw.file) ]
cols_sub = c("default"="black", "MQ bug"="red", "out-of-search-tol"="orange")
cols_sub = cols_sub[names(cols_sub) %in% data$col]
p = ggplot(data, col=data$col) +
geom_boxplot(aes_string(x = "fc.raw.file_ext", y = "uncalibrated.mass.error..ppm.", col="col"), varwidth = TRUE, outlier.shape = NA) +
scale_colour_manual("", values = cols_sub, guide = showColLegend) +
ylab(expression(Delta~"mass [ppm]")) +
xlab("") +
ylim(y_lim) +
#scale_x_discrete_reverse(data$fc.raw.file_ext) +
scale_x_discrete(limits=rev) +
geom_hline(yintercept = c(-extra_limit, extra_limit),
colour="red",
linetype = "longdash") + ## == vline for coord_flip
coord_flip() +
ggtitle("EVD: Uncalibrated mass error", title_sub)
#print(p)
return(p)
}
#'
#' Plot bargraph of uncalibrated mass errors for each Raw file.
#'
#' Boxes are optionally colored to indicate that a MQ bug was detected or
#' if PTXQC detected a too narrow search window.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'mass.error..ppm.'
#' @param MQBug_raw_files List of Raw files with invalid calibration values
#' @param stats A data.frame with columns 'fc.raw.file', 'outOfCal'
#' @param y_lim Range of y-axis
#' @param extra_limit Position where a v-line is plotted (for visual guidance)
#' @param title_sub Subtitle
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' n = c(150, 1000, 1000, 1000)
#' data = data.frame(fc.raw.file = repEach(letters[4:1], n),
#' mass.error..ppm. = c(rnorm(n[1], 1, 2.4),
#' rnorm(n[2], 0.5, 0.5),
#' rnorm(n[3], 0.1, 0.7),
#' rnorm(n[4], 0.3, 0.8)))
#' stats = data.frame(fc.raw.file = letters[4:1],
#' sd = c(2.4, 0.5, 0.7, 0.8),
#' outOfCal = c(TRUE, FALSE, FALSE, FALSE))
#' plot_CalibratedMSErr(data, MQBug_raw_files = letters[1], stats, y_lim = c(-20,20), 15, "subtitle")
#'
plot_CalibratedMSErr = function(data, MQBug_raw_files, stats, y_lim, extra_limit = NA, title_sub = "")
{
data$col = "default"
if (length(MQBug_raw_files) > 0) {
data$col = c("default", "MQ bug")[(data$fc.raw.file %in% MQBug_raw_files) + 1]
data$mass.error..ppm.[data$fc.raw.file %in% MQBug_raw_files] = 0
if (all(data$mass.error..ppm.==0)) data$mass.error..ppm. = rnorm(nrow(data), sd=0.0001, mean=mean(y_lim))
}
## add 'out-of-calibration' Raw files:
data$col[data$fc.raw.file %in% stats$fc.raw.file[stats$outOfCal]] = "out-of-search-tol"
## only show legend if special things happen
showColLegend = ifelse(length(setdiff(data$col, "default")) > 0, "legend", "none")
cols_sub = c("default"="black", "MQ bug"="red", "out-of-search-tol"="orange")
cols_sub = cols_sub[names(cols_sub) %in% data$col]
## plot
p = ggplot(data, col = data$col) +
geom_boxplot(aes_string(x = "fc.raw.file", y = "mass.error..ppm.", col="col"), varwidth = TRUE, outlier.shape = NA) +
scale_colour_manual("", values = cols_sub, guide = showColLegend) +
ylab(expression(Delta~"mass [ppm]")) +
xlab("") +
ylim(y_lim) +
#scale_x_discrete_reverse(data$fc.raw.file) +
scale_x_discrete(limits=rev) +
coord_flip() +
ggtitle("EVD: Calibrated mass error", title_sub)
if (!is.na(extra_limit)) {
p = p + geom_hline(yintercept = c(-extra_limit, extra_limit), colour="red", linetype = "longdash") ## == vline for coord_flip
}
#print(p)
return (p)
}
#'
#' Plot bargraph of oversampled 3D-peaks.
#'
#' Per Raw file, at most three n's must be given, i.e.
#' the fraction of 3D-peaks for n=1, n=2 and n=3(or more).
#' The fractions must sum to 1 (=100%).
#'
#'
#' @param data A data.frame with columns 'fc.raw.file', 'n', 'fraction'
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(letters[1:3], each=3),
#' n = 1:3,
#' fraction = c(0.8, 0.1, 0.1, 0.6, 0.3, 0.1, 0.7, 0.25, 0.05))
#' plot_MS2Oversampling(data)
#'
plot_MS2Oversampling = function(data)
{
stopifnot(length(unique(data$n)) <= 3) ## at most three -- to match color vector below
#data = d_dups
## reorder factor, such that '10+' is last
data$n = as.character(data$n)
n_unique = sort(unique(data$n)) ## sort as character vector!
data$n = factor(data$n, levels=n_unique[order(nchar(n_unique))], ordered = TRUE)
p = ggplot(data) +
geom_col(position = position_stack(reverse = TRUE), aes_string(x = "fc.raw.file", y = "fraction", fill="n")) +
scale_fill_manual("MS/MS\ncounts", values =c("green", "blue", "red")) +
scale_x_discrete_reverse(data$fc.raw.file) +
xlab("") +
ylab("MS/MS counts per 3D-peak [%]") +
ggtitle(paste0("EVD: Oversampling (MS/MS counts per 3D-peak)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
coord_flip()
#print(p)
return(p)
}
#'
#' Plot bargraph of oversampled 3D-peaks.
#'
#' Per Raw file, at most three n's must be given, i.e.
#' the fraction of 3D-peaks for n=1, n=2 and n=3(or more).
#' The fractions must sum to 1 (=100%).
#'
#'
#' @param data A data.frame with columns 'file', 'msErr', 'type'
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' n = c(100, 130, 50)
#' data = data.frame(file = repEach(paste(letters[1:3],"\nLTQ [Da]"), n),
#' msErr = c(rnorm(n[1], 0.5), rnorm(n[2], 0.0), rnorm(n[3], -0.5)),
#' type = c("forward", "decoy")[1+(runif(sum(n))>0.95)])
#' plot_MS2Decal(data)
#'
plot_MS2Decal = function(data)
{
## trim down the data to 2-98 percentiles (to avoid outliers far off)
data2 = plyr::ddply(data, "file", function(x) {
qnt = quantile(x$msErr, probs = c(0.02, 0.98), na.rm = TRUE)
return (x[qnt[1] < x$msErr & x$msErr < qnt[2], ])
})
p = ggplot(data2, aes_string(x = "msErr", fill="type")) +
geom_histogram(bins = 30) +
xlab("fragment mass delta") +
ylab("count") +
scale_fill_manual(values = c(forward = "#99d594", decoy = "#ff0000")) +
ggtitle("MSMS: Fragment mass errors per Raw file") +
facet_wrap(~file, scales = "fixed")
#print(p)
return(p)
}
#'
#' Plot bargraph of missed cleavages.
#'
#' Per Raw file, an arbitrary number of missed cleavage classes (one per column) can be given.
#' The total fraction of 3D-peaks must sum to 1 (=100%).
#' Columns are ordered by name.
#'
#' A visual threshold line is drawn at 75% (expected MC0 count).
#'
#' @param data A data.frame with columns 'fc.raw.file', '...' (missed cleavage classes)
#' @param title_sub Plot's subtitle
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = letters[1:5],
#' MC0 = c(0.8, 0.5, 0.85, 0.2, 0.9),
#' MC1 = c(0.1, 0.4, 0.05, 0.7, 0.0),
#' "MS2+" = c(0.1, 0.1, 0.1, 0.1, 0.1),
#' check.names = FALSE)
#' plot_MissedCleavages(data, "contaminant inclusion unknown")
#'
plot_MissedCleavages = function(data, title_sub = "")
{
st_bin.m = reshape2::melt(data, id.vars = c("fc.raw.file"))
p =
ggplot(data = st_bin.m, aes_string(x = "factor(fc.raw.file)", y = "value", fill = "variable")) +
geom_col(position = position_stack(reverse = TRUE)) +
xlab("Raw file") +
ylab("missed cleavages [%]") +
theme(legend.title = element_blank()) +
scale_fill_manual(values = rep(c("#99d594", "#ffffbf", "#fc8d59", "#ff0000", "#800080", "#000000"), 10)) +
geom_abline(alpha = 0.5, intercept = 0.75, slope = 0, colour = "black", linetype = "dashed", size = 1.5) +
coord_flip() +
scale_x_discrete_reverse(st_bin.m$fc.raw.file) +
ggtitle("MSMS: Missed cleavages per Raw file", title_sub)
#print(p)
return(p)
}
#'
#' Plot line graph of TopN over Retention time.
#'
#' Number of Raw files must be 6 at most. Function will stop otherwise.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'rRT', 'topN'
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(letters[1:3], each=100),
#' rRT = seq(20, 120, length.out = 100),
#' topN = c(round(runif(100, min=3, max=5)),
#' round(runif(100, min=5, max=8)),
#' round(runif(100, min=1, max=3)))
#' )
#' plot_TopNoverRT(data)
#'
plot_TopNoverRT = function(data)
{
nrOfRaws = length(unique(data$fc.raw.file))
p = ggplot(data, aes_string(x = "rRT", y = "topN", col = "fc.raw.file")) +
geom_line() +
scale_color_manual(values = brewer.pal.Safe(nrOfRaws, "Set1")) +
xlab("retention time [min]") +
ylab("highest N [median per RT bin]") +
#stat_smooth(method = "loess", formula = y ~ x, se = FALSE, span = 0.1) +
guides(color=guide_legend(title="")) +
ggtitle("MSMSscans: TopN over RT")
#print(p)
return (p)
}
#'
#' Plot line graph of TopN over Retention time.
#'
#' Number of Raw files must be 6 at most. Function will stop otherwise.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'rRT', 'medIIT'
#' @param stats A data.frame with columns 'fc.raw.file', 'mean'
#' @param extra_limit Visual guidance line (maximum acceptable IIT)
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(c("d","a","x"), each=100),
#' rRT = seq(20, 120, length.out = 100),
#' medIIT = c(round(runif(100, min=3, max=5)),
#' round(runif(100, min=5, max=8)),
#' round(runif(100, min=1, max=3)))
#' )
#' stats = data.frame(fc.raw.file = c("d","a","x"),
#' mean = c(4, 6.5, 2))
#' plot_IonInjectionTimeOverRT(data, stats, 10)
#'
plot_IonInjectionTimeOverRT = function(data, stats, extra_limit)
{
data$fc.raw.file = data$fc.raw.file[,drop = TRUE] ## drop unused factor levels
nrOfRaws = length(unique(data$fc.raw.file))
stats_sub = stats[stats$fc.raw.file %in% data$fc.raw.file, , drop = FALSE]
## augment legend with average II-time[ms]
data$fc.raw.file = paste0(data$fc.raw.file, " (~",
round(stats_sub$mean[match(data$fc.raw.file, stats_sub$fc.raw.file)]),
" ms)")
## manually convert to factor to keep old ordering (otherwise ggplot will sort it, since its a string)
data$fc.raw.file = factor(data$fc.raw.file, levels = unique(data$fc.raw.file), ordered = TRUE)
stats_sub$fc.raw.file = paste0(stats_sub$fc.raw.file, " (~",
round(stats_sub$mean[match(stats_sub$fc.raw.file, stats_sub$fc.raw.file)]),
" ms)")
p = ggplot(data) +
geom_line(aes_string(x = "rRT", y = "medIIT", col = "fc.raw.file")) +
scale_color_manual(values = brewer.pal.Safe(nrOfRaws, "Set1")) +
xlab("retention time [min]") +
ylab("ion injection time [ms]") +
geom_hline(yintercept = extra_limit, linetype = 'dashed') +
guides(color=guide_legend(title="Raw file with\naverage inj. time")) +
ggtitle("MSMSscans: Ion Injection Time over RT") +
pointsPutX(x_range = range(data$rRT), x_section = c(0.03, 0.08), y = stats_sub$mean, col = stats_sub$fc.raw.file[,drop = TRUE])
#print(p)
return(p)
}
#'
#' Plot line graph of TopN over Retention time.
#'
#' Number of Raw files must be 6 at most. Function will stop otherwise.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'scan.event.number', 'n'
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = rep(c("d","a","x"), each=10),
#' scan.event.number = 1:10,
#' n = 11:20)
#' plot_TopN(data)
#'
plot_TopN = function(data)
{
p = ggplot(data, aes_string(x = "scan.event.number", y = "n")) +
geom_col() +
xlab("highest scan event") +
ylab("count") +
facet_wrap(~ fc.raw.file, scales = "free_y") +
ggtitle(paste0("MSMSscans: TopN"))
#print(p)
return(p)
}
#'
#' Plot line graph of TopN over Retention time.
#'
#' Number of Raw files must be 6 at most. Function will stop otherwise.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'scan.event.number', 'ratio', 'count'
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#' data = data.frame(fc.raw.file = factor(rep(c("d","a","x"), each=10), levels = c("d","a","x")),
#' scan.event.number = 1:10,
#' ratio = seq(40, 20, length.out=10),
#' count = seq(400, 200, length.out=10))
#' plot_ScanIDRate(data)
#'
plot_ScanIDRate = function(data)
{
p = ggplot(data, aes_string(x = "scan.event.number", y = "ratio", alpha = "count")) +
geom_col() +
xlab("scan event") +
ylab("percent identified") +
facet_wrap(~ fc.raw.file) +
ggtitle(paste0("MSMSscans: TopN % identified over N"))
return (p)
}
#'
#' Plot Total Ion Count over time
#'
#' The input is a data.frame with already averaged counts over binned RT-slices.
#'
#' @param data A data.frame with columns 'fc.raw.file', 'RT', 'intensity'
#' @param x_lim Plot range of x-axis
#' @param y_lim Plot range of y-axis
#' @return GGplot object
#'
#' @import ggplot2
#' @export
#'
#' @examples
#'
#' data = data.frame(fc.raw.file = rep(c("file A", "file B", "file C"), each=81),
#' RT = c(20:100),
#' intensity = c(rnorm(81, mean=20), rnorm(81, mean=10), rnorm(81, mean=30)))
#' plot_TIC(data, c(10, 100), c(0, 40))
#'
plot_TIC = function(data, x_lim, y_lim)
{
p = ggplot(data) +
geom_line(aes_string(x = "RT", y = "intensity", colour = "fc.raw.file"), size=1, alpha=0.7) +
scale_color_manual(values = brewer.pal.Safe(length(unique(data$fc.raw.file)), "Set1")) +
guides(color = guide_legend(title = "Raw file\n(avg. peak width)")) +
xlab("retention time [min]") +
ylab("intensity") +
coord_cartesian(xlim = x_lim, ylim = y_lim) + ## zoom in y -- do not cut data (preserve lines)
ggtitle("SM: Total Ion Count") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
#print(p)
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.