htmltools::img(src = "https://raw.githubusercontent.com/ningzhibin/rmdocpu/master/inst/rmd/iMetaReport.png", alt = 'logo', style = 'position:absolute; top:0; right:0; padding:10px;width:100px;height:100px;')
# enviroment setup knitr::opts_chunk$set(echo = FALSE,warning = FALSE, message = FALSE, cache = FALSE) library(tidyverse) library(ggplot2) library(plotly) # other reqruied package: # DT # requred, but not need to library # Version update! # 20200206 add fully support for pdf and word format, see how to use down below. # fix the problem for displaying ggplotly on html in a row by adding a break line. Otherwise it will only show the last one # This is because of the tag thing in html # add a dynamic control of the figure height, when samples too many, increase the figure height, otherwise it is not recoganizable on static report, like pdf and word # add a parameterized control of rendering to html or static format, for html one, use ggplotly, for static one, use ggplot2 # fix some bugs, imcompatible with ggplotly # fix some bugs, to make it the run more efficiently # add a function to order the rawfiles according to the user input order, if grouping information provided, order according to the groups first # adda function to report the group information, provided or not, grouping information if provided, error message if not provided but not matched with the raw files, or with missing values, which can help users to double check the meta table setting # learned A lot of new stuff for rmarkdown. like how to use r variables/values in r section settings(like the figure_height in this document), or directly used out of r blocks, like the meta inform in this documents. #
# local usage test and debug simple file df_summary.txt <- read.delim("summary.txt", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) rmarkdown::render("MQ_report_summary_indev.Rmd", params = list(summary_file_tbl = df_summary.txt)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "pdf_document", params = list(summary_file_tbl = df_summary.txt,if_html =FALSE)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "word_document", params = list(summary_file_tbl = df_summary.txt,if_html =FALSE)) # local usage test and debug: big file df_summary.txt <- read.delim("22122017_Colon_Aging_summary.txt", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) rmarkdown::render("MQ_report_summary_indev.Rmd", params = list(summary_file_tbl = df_summary.txt)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "pdf_document", params = list(summary_file_tbl = df_summary.txt,if_html =FALSE)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "word_document", params = list(summary_file_tbl = df_summary.txt,if_html =FALSE)) # local usage test and debug, big file without meta df_summary.txt <- read.delim("22122017_Colon_Aging_summary.txt", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) rmarkdown::render("MQ_report_summary_indev.Rmd", params = list(summary_file_tbl = df_summary.txt)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "pdf_document", params = list(summary_file_tbl = df_summary.txt,if_html =FALSE)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "word_document", params = list(summary_file_tbl = df_summary.txt,if_html =FALSE)) # local usage test and debug, big file with meta df_summary.txt <- read.delim("22122017_Colon_Aging_summary.txt", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) meta_table <- read.delim("meta.txt", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) rmarkdown::render("MQ_report_summary_indev.Rmd", params = list(summary_file_tbl = df_summary.txt, meta_table = meta_table)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "pdf_document", params = list(summary_file_tbl = df_summary.txt, meta_table = meta_table,if_html =FALSE)) rmarkdown::render("MQ_report_summary_indev.Rmd",output_format = "word_document", params = list(summary_file_tbl = df_summary.txt, meta_table = meta_table,if_html =FALSE))
tidy_MQ_summary <- function(df_summary.txt){ # the format of summary.txt mainly fall into three categories # df_summary.txt has to be a tidyverse tbl format, from read_tsv # take out the last line to organize into a data.frame last_line <- t(df_summary.txt[nrow(df_summary.txt),]) last_line[last_line[,1] == ""] <- NA last_line[last_line[,1] == "0"] <- NA last_line <- last_line[-1,, drop = FALSE] last_line <- last_line[which(!is.na(last_line[,1])),, drop = FALSE] colnames(last_line) <- ("values") summary <- df_summary.txt[-nrow(df_summary.txt),] # remove the last line # take out rows about raw files summary # if there are experiment design column, if(length(grep("Experiment", colnames(summary)))>0){ # and if there are separate rows for experimental desgin, # otherwise, expereiment desgin is set with one raw file one experiment if(length(which(nchar(summary$Experiment) ==0)) >0){ # this df_rawfiles <-summary[which(nchar(summary$Experiment) >0),] # take out rows about experiment summary df_experiment <- summary[which(nchar(summary$Experiment) ==0),] return(list("summary_all" = last_line, "summary_rawfiles" = df_rawfiles, "summary_experiment" = df_experiment, "set_experiment" = TRUE )) }else{ df_rawfiles <- summary return(list("summary_all" = last_line, "summary_rawfiles" = df_rawfiles, "set_experiment" = FALSE # even with experiment setup, still no need to do )) } # otherwise no need to do separate experiment display }else{ df_rawfiles <- summary return(list("summary_all" = last_line, "summary_rawfiles" = df_rawfiles, "set_experiment" = FALSE )) } } MQ_QC_plot<- function(data.frame, plot_type = c("scatter", "bar", "density", "histogram", "freqpoly", "box", "violin") , group = NULL, # needs to be column name cutoff = 20, maintitle = "", xlabel = "", vertical = FALSE, ... ){ # in case some column names are not valid names (containing special symbol, like space, % etc) names(data.frame)[1] <- "names" names(data.frame)[2] <- "value" if(length(plot_type) == 0){ # if no plot type defined, do not plot and exit stop }else{ plot_out <- list() } if(is.null(group)){ # for none grouped # this will enrsure the default plottinng order is followin the input order of the summary.txt, data.frame$names <- factor(data.frame$names, levels = data.frame$names) #for violinplot and boxplot, a fake group is needed group <- "All" data.frame$All = "All" #plotting if("scatter" %in% plot_type){ scatter_plot <- ggplot(data.frame) + geom_point(aes_string(x = "names", y = "value")) + geom_hline(yintercept = cutoff, linetype="dashed", color = "blue", size=1) + labs(title = maintitle, x = "", y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) + coord_flip() plot_out <- c(plot_out, list("scatter_plot" = scatter_plot)) } if("bar" %in% plot_type){ bar_plot <- ggplot(data.frame) + geom_hline(yintercept = cutoff, linetype="dashed", color = "blue", size=1) + geom_col(aes_string(x = "names", y = "value"))+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank())+ coord_flip() plot_out <- c(plot_out, list("bar_plot" = bar_plot)) } if("freqpoly" %in% plot_type){ # distritibution freqpoly_plot <- ggplot(data.frame) + annotate("rect", xmin=-Inf, xmax= cutoff, ymin=0, ymax=Inf, alpha=0.1, fill="red") + annotate("rect", xmin=cutoff, xmax=Inf, ymin=0, ymax=Inf, alpha=0.5, fill="lightblue") + geom_freqpoly(aes_string("value") )+ geom_vline(xintercept = cutoff, linetype="dashed", color = "blue", size=1)+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ freqpoly_plot <- freqpoly_plot + coord_flip() } plot_out <- c(plot_out, list("freqpoly_plot" = freqpoly_plot)) } if("histogram" %in% plot_type){ histogram_plot <- ggplot(data.frame) + annotate("rect", xmin=-Inf, xmax= cutoff, ymin=0, ymax=Inf, alpha=0.1, fill="red") + annotate("rect", xmin=cutoff, xmax=Inf, ymin=0, ymax=Inf, alpha=0.5, fill="lightblue") + geom_histogram(aes_string("value"),position = "identity",alpha = 0.5)+ geom_vline(xintercept = cutoff, linetype="dashed", color = "blue", size=1)+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ histogram_plot <- histogram_plot + coord_flip() } plot_out <- c(plot_out, list("histogram_plot" = histogram_plot)) } if("density" %in% plot_type){ density_plot <-ggplot(data.frame) + annotate("rect", xmin=-Inf, xmax= cutoff, ymin=0, ymax=Inf, alpha=0.1, fill="red") + annotate("rect", xmin=cutoff, xmax=Inf, ymin=0, ymax=Inf, alpha=0.5, fill="lightblue") + geom_density(aes_string("value"),position = "identity",alpha = 0.5) + geom_vline(xintercept = cutoff, linetype="dashed", color = "blue", size=1)+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ density_plot <- density_plot + coord_flip() } plot_out <- c(plot_out, list("density_plot" = density_plot)) } }else{ # if grouping provided, order by group data.frame$names <- factor(data.frame$names, levels = data.frame$names[order(data.frame[group])]) if("scatter" %in% plot_type){ scatter_plot <- ggplot(data.frame) + geom_point(aes_string(x = "names", y = "value", colour = group)) + geom_hline(yintercept = cutoff, linetype="dashed", color = "blue", size=1) + labs(title = maintitle, x = "", y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank())+ coord_flip() plot_out <- c(plot_out, list("scatter_plot" = scatter_plot)) } if("bar" %in% plot_type){ bar_plot <- ggplot(data.frame) + geom_hline(yintercept = cutoff, linetype="dashed", color = "blue", size=1) + geom_col(aes_string(x = "names", y = "value", fill = group))+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank())+ coord_flip() plot_out <- c(plot_out, list("bar_plot" = bar_plot)) } if("freqpoly" %in% plot_type){ # distritibution freqpoly_plot <- ggplot(data.frame) + annotate("rect", xmin=-Inf, xmax= cutoff, ymin=0, ymax=Inf, alpha=0.1, fill="red") + annotate("rect", xmin=cutoff, xmax=Inf, ymin=0, ymax=Inf, alpha=0.5, fill="lightblue") + geom_freqpoly(aes_string("value",colour = group) )+ geom_vline(xintercept = cutoff, linetype="dashed", color = "blue", size=1)+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ freqpoly_plot <- freqpoly_plot + coord_flip() } plot_out <- c(plot_out, list("freqpoly_plot" = freqpoly_plot)) } if("histogram" %in% plot_type){ histogram_plot <- ggplot(data.frame) + annotate("rect", xmin=-Inf, xmax= cutoff, ymin=0, ymax=Inf, alpha=0.1, fill="red") + annotate("rect", xmin=cutoff, xmax=Inf, ymin=0, ymax=Inf, alpha=0.5, fill="lightblue") + geom_histogram(aes_string("value", colour = group, fill = group),position = "identity",alpha = 0.5)+ geom_vline(xintercept = cutoff, linetype="dashed", color = "blue", size=1)+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ histogram_plot <- histogram_plot + coord_flip() } plot_out <- c(plot_out, list("histogram_plot" = histogram_plot)) } if("density" %in% plot_type){ density_plot <-ggplot(data.frame) + annotate("rect", xmin=-Inf, xmax= cutoff, ymin=0, ymax=Inf, alpha=0.1, fill="red") + annotate("rect", xmin=cutoff, xmax=Inf, ymin=0, ymax=Inf, alpha=0.5, fill="lightblue") + geom_density(aes_string("value", colour = group, fill = group),position = "identity",alpha = 0.5) + geom_vline(xintercept = cutoff, linetype="dashed", color = "blue", size=1)+ labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ density_plot <- density_plot + coord_flip() } plot_out <- c(plot_out, list("density_plot" = density_plot)) } } # the following 2 plots have to use grouping information for plotting, even with only 1 group if("violin" %in% plot_type){ violin_plot <- ggplot(data.frame) + geom_violin(aes_string(x =group, y = "value", colour = group, fill = group))+ geom_jitter(aes_string(x =group, y = "value",colour = group, fill = group),shape=21) + geom_hline(yintercept = cutoff, linetype="dashed", color = "blue", size=1) + labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ violin_plot <- violin_plot + coord_flip() } plot_out <- c(plot_out, list("violin_plot" = violin_plot)) } if("box" %in% plot_type){ box_plot <- ggplot(data.frame) + geom_boxplot(aes_string(x =group, y = "value", colour = group, fill = group))+ geom_jitter(aes_string(x =group, y = "value",colour = group, fill = group),shape=21) + geom_hline(yintercept = cutoff, linetype="dashed", color = "blue", size=1) + labs(title = maintitle, x = "",y = xlabel) + theme_bw() + theme(plot.title = element_text(hjust = 0.5), axis.text.x = element_text(angle = 90, hjust = 1), panel.grid = element_blank()) if(vertical){ box_plot <- box_plot + coord_flip() } plot_out <- c(plot_out, list("box_plot" = box_plot)) } return(plot_out) # Example # my_test <- data.frame("samplename" = paste0("sample_", 1:20), # "msms_id" = c(abs(rnorm(10))*10+20, abs(rnorm(10))*10+30), # "treat_group" = c(paste0("group_", rep("A",10)), paste0("group_", rep("B",10))) # ) # # tt <- MQ_QC_plot(my_test, plot_type = c("scatter","bar","density", "histogram", "freqpoly", "box", "violin"), cutoff = 35, group = "treat_group", maintitle = "MSMS ID Rate", xlabel = "MS/MS ID %") # now tt has all the required plot } # this function is borrowed from: http://michaeljw.com/blog/post/subchunkify/ # subchunkify <- function(g, fig_height=7, fig_width=5) { # g is a ggplot object # g_deparsed <- paste0(deparse( # function() {g} # ), collapse = '') # # sub_chunk <- paste0(" # `","``{r sub_chunk_", floor(runif(1) * 10000), ", fig.height=", # fig_height, ", fig.width=", fig_width, ", echo=FALSE}", # "\n(", # g_deparsed # , ")()", # "\n`","`` # ") # # cat(knitr::knit(text = knitr::knit_expand(text = sub_chunk), quiet = TRUE)) # }
This report provides some overall description of the database search. Users can use this to quickly check the overal quality of the experiment
# input if(is.null(params$summary_file_tbl)){ # test with local files df_summary.txt <- read.delim("summary.txt", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) }else{ # opencpu render from data table by parametrized input df_summary.txt <- params$summary_file_tbl } # Note: some following analysis with meta infor assumes that # 1st columns as sample name, 2nd column as experiment name, 3rd column and after as grouping meta_table <- params$meta_table
# process the file summary_file_summary <- tidy_MQ_summary(df_summary.txt) # get the raw stat line for output df_all <- as.data.frame(t(summary_file_summary$summary_all)) # get the raw file information dataframe raw <-summary_file_summary$summary_rawfiles data.frame <- data.frame(rawfile= raw[["Raw file"]], msms_id_rate = raw[["MS/MS Identified [%]"]]) if( is.null(meta_table)){ meta_info <- "* **No meta information provided**" }else if (any(is.na(meta_table))){ meta_info <- "* **The meta information provided has missing values, please check again**" }else if(!(all(as.vector(data.frame$rawfile) %in% meta_table[,1]) && all( meta_table[,1] %in% as.vector(data.frame$rawfile)))){ meta_info <- "* **The raw files in meta information provided do not match the raw file names in the summary.txt, please check again**" }else{ meta_info <- c("* **Groups: **",unique(meta_table[,3])) }
Peptide Sequences Identified in total: r df_all["Peptide Sequences Identified"]
Avearge ms/ms identification rate(%): r df_all["MS/MS Identified [%]"]
r meta_info
Why you should pay attention to MSMS Id rate?
MS ID rate is a good repretation of the MS run quality. Raw files from Q-Exactive series should have roughly around 50% ms ID rate (Percentage of MSMS spectra identified as peptided, at a 1% FDR) for humane cell culture digest, and at least 20% for metaproteomics samples according to experience.
MS ID rate should also be well-reproduced across samples and groupings.
Check the raw files if they have obnormally low ID rate, usually with abnormal LC/basepeak profile or low MS intensity.
# determin the figure height. if some files have too many samples, it would be a good idea to increase the height to make it easier to figurue_height <- 0.1*nrow(data.frame)+2 if( is.null(meta_table)){ # plot without meta plots_msms <- MQ_QC_plot(data.frame, plot_type = c("scatter","density", "box"), cutoff = 20,maintitle = "MSMS ID Rate", xlabel = "MS/MS ID %") }else{ # put the grouping information into the data.frame data.frame_merged_msms <- merge(data.frame, meta_table, by.x= colnames(data.frame)[1], by.y = colnames(meta_table)[1]) # the plot, with meta, by the first column of grouping information, which is column 4 (column 3 as experiment) # here, can add suport for more grouping plots_msms <- MQ_QC_plot(data.frame_merged_msms, plot_type = c("scatter","density","box"), group = colnames(data.frame_merged_msms)[4],cutoff = 20,maintitle = "MSMS ID Rate", xlabel = "MS/MS ID %") }
if(params$if_html){ plotly::ggplotly(plots_msms$scatter_plot) }else{ plots_msms$scatter_plot }
if(params$if_html){ plotly::ggplotly(plots_msms$density_plot) }else{ plots_msms$density_plot }
if(params$if_html){ plotly::ggplotly(plots_msms$box_plot) }else{ plots_msms$box_plot }
data.frame_peptide <- data.frame(rawfile= raw[["Raw file"]], Peptide_sequence_identified = raw[["Peptide Sequences Identified"]]) if(is.null(meta_table)){ # plot without meta plots_peptide <- MQ_QC_plot(data.frame_peptide, plot_type = c("scatter","density", "box"), cutoff = 3000,maintitle = "Peptide Sequences Identified", xlabel = "Peptide Sequences Identified") }else{ # put the grouping information into the data.frame data.frame_merged_peptide <- merge(data.frame_peptide, meta_table, by.x= colnames(data.frame_peptide)[1], by.y = colnames(meta_table)[1]) # the plot, with meta, by the first column of grouping information, which is column 4 (column 3 as experiment) # here, can add suport for plots_peptide <- MQ_QC_plot(data.frame_merged_peptide, plot_type = c("scatter","density", "box"), group = colnames(data.frame_merged_peptide)[4],cutoff = 0,maintitle = "Peptide Sequences Identified", xlabel = "Peptide Sequences Identified") }
if(params$if_html){ plotly::ggplotly(plots_peptide$scatter_plot) }else{ plots_peptide$scatter_plot }
if(params$if_html){ plotly::ggplotly(plots_peptide$density_plot) }else{ plots_peptide$density_plot }
if(params$if_html){ plotly::ggplotly(plots_peptide$box_plot) }else{ plots_peptide$box_plot }
check the overall performance for all raw files:
if(params$if_html){ nrows <- nrow(summary_file_summary$summary_all) DT::datatable(summary_file_summary$summary_all,options = list(pageLength = nrows)) # datatable is only good for html }else{ knitr::kable(summary_file_summary$summary_all) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.