In protViz/SRMService: Report Quantitative Mass Spectrometry Data

knitr::opts_chunk$set(
  echo = FALSE,
  message = FALSE,
  warning = FALSE
)

if(!exists("progress")){
  progress <- function(howmuch, detail){
    invisible(NULL)
  }
}

Input Matrix

Experiment is called: r grp2$projectName

The Numbers

The protein matrix is filtered like this:

Minimum number of peptides / protein: r grp2$nrPeptides
Maximum of missing values per protein : r grp2$maxNA
The number of samples in this experiment is: r ncol(grp2$proteinIntensity)
The number of proteins in this experiment is: r nrow(grp2$proteinIntensity)
Total number without decoys sequences is r nrow(grp2$proteinIntensity) - sum(grepl("REV__",grp2$proteinAnnotation$ProteinName))
Percentage of contaminants : r round(mean(grepl("CON__",grp2$proteinAnnotation$ProteinName)) * 100, digits=1) %
Percentage of false postivies : r round(mean(grepl("REV__",grp2$proteinAnnotation$ProteinName)) * 100, digits=1) %

library(knitr)
library(limma)

\pagebreak

Proteins Used for Quantitation

missing <- grp2$getNrNAs()
int <- apply(grp2$proteinIntensity,1,sum, na.rm=TRUE)
grp2$proteinIntensity <- grp2$proteinIntensity[order(missing, -int,decreasing = T),]

The input matrix has the following structure (Figure \@ref(fig:overview)).

library(quantable)

imageWithLabels(t(log2(grp2$proteinIntensity)), col.labels = NULL, col=quantable::getBlueScale(21))

barplot(cumsum(table(missing))/length(missing)*100, ylab="% of proteins", xlab="# of missing values per protein")
progress(0.1, "Summary")

mar <-par()$mar
par(mar = c(13,3,3,3))
barplot(quantable::colNAs(grp2$proteinIntensity)/nrow(grp2$proteinIntensity)*100, las=2, ylab="percent missing", cex.names=0.7)
par(mar= mar)

\pagebreak

Distribution of Intensities

Shown in Figure \@ref(fig:distributionRaw) are the un-normalized values while in Figure \@ref(fig:normalized) are the z-transformed values (subtracted median and divided by variance).

library(ggplot2)
library(reshape2)
longm <- melt(log2(grp2$proteinIntensity))

p <- qplot( variable , value , data=longm , geom="violin" , xlab="" , ylab="log2(I)")
p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))

Figure \@ref{fig:scaling} shows the median and standard deviations of the log2 transformed intensities. Large differences in these values are critical. These values are used to scale the samples.

bb <- grp2$getNormalized()$medians

par(mar=c(15,6,3,6))
barplot(sort(abs(bb))  - mean(bb) ,horiz=F,las=2, main="median", cex.names = 0.6, ylab="log2(sample average) - log2(total average)", ylim=c(-log2(8),log2(8)))
abline(h=c(-log2(5),log2(5)),col=2)
x<-seq(-3,3,by=1)
axis(4,x,round(2^abs(x),digits=1))
mtext("linear scale", side=4, line=2)

progress(0.2, "Normalization")

The effect of the normalization (z transformation) is visualized in Figure \@ref(fig:normalized). It shows the z transformed log2 intensities.

longm <- melt(grp2$getNormalized()$data)
p <- qplot( variable , value , data=longm , geom="violin" , xlab="" , ylab="z-score")
p + stat_summary(fun.y=median,geom='point') + theme(axis.text.x = element_text(angle = 90, hjust = 1))

\pagebreak

if(nrow(grp2$getNormalized()$data) <= 10){
  quantable::mypairs(grp2$getNormalized()$data)
}

\pagebreak

Coefficients of Variations

all <- quantable::CV(grp2$proteinIntensity)
CVs <- rbind(data.frame(condition="all", cv=all))

p <- qplot( condition , cv , data=CVs , geom="violin" , xlab="" , ylab="Coefficient of Variation (%)")
p + stat_summary(fun.y=median,geom='point') + theme(axis.text.x = element_text(angle = 90, hjust = 1))

cvSummary <- aggregate(cv ~ condition , data=CVs , median, na.rm=TRUE)
knitr::kable(cvSummary,caption = 'median of cv')

\pagebreak

all <- apply( grp2$getNormalized()$data, 1 , sd, na.rm=TRUE )
SDs <- rbind(data.frame(condition="all", sd=all))

p <- qplot( condition , sd , data=SDs , geom="violin" , xlab="" , ylab="sd of z-score")
p + stat_summary(fun.y=median,geom='point') + theme(axis.text.x = element_text(angle = 90, hjust = 1))

sdSummary <-aggregate(sd ~ condition , data=SDs , median, na.rm=TRUE)
knitr::kable(sdSummary, caption = 'median of sd')

progress(0.1, "CVs")

\pagebreak

Heatmaps and Clustering for Samples and Proteins

simpleheatmap(cor(grp2$getNormalized()$data, 
                  use="pairwise.complete.obs", 
                  method="spearman")^2,
                  palette = getGreensScale(21),
                  margins = c(10,3))

In Figure \@ref(fig:heatmapData) and Figure \@ref(fig:correlation) we show how samples are clustering depending on their correlation and on the protein expression profiles.

tmp <- grp2$getNormalized()$data

plotMat <- grp2$getNormalized()$data[grp2$getNrNAs() < ncol(grp2$getNormalized()$data)/2,]
plotMat <- t(scale(t(plotMat),scale = F))
simpleheatmap(
  plotMat,
  margins=c(10,3) ,breaks=seq(-2.5,2.5,length=26),palette = getBlueWhiteRed(25)
)

progress(0.2, "Heatmaps")

References

This report was generated using the package SRMService and quantable. The q-values and p-values were computed using the bioconductor package limma and qvalue.

For questions and improvements please do contact the authors of the package SRMService.

Disclaimer and Acknowledgements

This document was generated using Rmarkdown and processes text files which are generated with a label-free quantitation software such as MaxQuant or Progenesis. The obtained results should be validated orthogonally as well (e.g. with Western blots). The Functional Genomics Center Zurich does not provide any kind of guarantee of the validity of these results.

\newpage