In protViz/SRMService: Report Quantitative Mass Spectrometry Data

knitr::opts_chunk$set(fig.width=8, fig.height=8, fig.path='Figs/',
                      echo=FALSE, warning=FALSE, message=FALSE)

barplot(table(protData$getWideFormat()$Condition))

Number of samples per group.

library(lattice)
library(reshape2)
library(RColorBrewer)
library(quantable)
library(ggplot2)
library(reshape2)
library(glmnet)
library(lattice)
library(plyr)


bwplot(Intensity ~ Condition | Protein, data=protData$data,scales=list(x=list(rot=90),relation="free") )

Distribution of protein fold changes per group.

colors <- rep("", nrow(protData$getWideFormat()))

Ucond <- unique(protData$getWideFormat()$Condition)
mypalette <- brewer.pal(length(Ucond),"Dark2")

for(i in 1:length(Ucond)){
  colors[protData$getWideFormat()$Condition==Ucond[i]]<-mypalette[i]
}

pch <- colors

for(i in 1:length(Ucond)){
  pch[protData$getWideFormat()$Condition==Ucond[i]]<-i
}

pch <- as.numeric(pch)
plot(1,type="n", axes = FALSE,xlab="", ylab="")
legend(1,1, legend=Ucond,cex=1,pch=1:length(Ucond),lwd=4,lty=1,col=mypalette, xjust=0.5, yjust=0.5)

The variable selection algorithm does not work with missing values therefore we replace them with the median fold change of a protein. A possible improvement would be to replace them with the median protein intensity of protein within a group.

Overview Heatmap

simpleheatmap( protData$getIntensities() , palette = getBlueWhiteRed(21),ylab="Ig", main="",RowSideColors=colors)

Overview Heatmap Scaled

simpleheatmap( scale(protData$getIntensities()) , palette = getBlueWhiteRed(21), main="",RowSideColors=colors)

How to do the variables correlate

simpleheatmap( cor((protData$getIntensities()), use="pairwise.complete.obs", method="spearman")^2 , palette = getGreensScale(21),xlab="Ig",ylab="Ig", main="R^2")

How do the bio replicates correlate

mat <- (cor(t(protData$getIntensities()),use="pairwise.complete.obs", method="spearman"))^2

simpleheatmap(mat,RowSideColors=colors, xlab="patient", ylab="patient", main="R^2",palette = getGreensScale(21))

Violin Plot

Bioreplicate

p <- qplot( Run , Intensity , data=protData$data , geom="violin" , xlab="" , ylab="log10(I)")
p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))

Variables

p <- qplot( Protein , Intensity , data=protData$data , geom="violin" , xlab="" , ylab="log10(I)")
p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))

PCA

#datalog2 <- scale(datalog2)
sum(is.na(unlist(protData$getIntensitiesNoMissing())))
#colnames(datalog2)<-paste("p", colnames(datalog2), sep=".")

fmla <- as.formula(paste(" ~ ", paste(colnames(protData$getIntensitiesNoMissing()), collapse= "+")))

ir.pca <- prcomp( fmla, data=data.frame(protData$getIntensitiesNoMissing()), center = TRUE, scale. = TRUE, na.action=na.omit)
par(mfrow=c(3,1))

plot(ir.pca$x[,1:2],col=colors,pch=pch)
legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))

plot(ir.pca$x[,2:3],col=colors,pch=pch)
legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))

plot(ir.pca$x[,3:4],col=colors,pch=pch)
legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))

Variable Selection

disease <- as.factor(protData$getWideFormat()$Condition)
mylambda <- 1/(1:25)^1.7
datalog2 <- as.matrix(protData$getIntensitiesNoMissing())

glmmod <- glmnet(datalog2,y=disease,family='multinomial', lambda =mylambda,maxit= 1000000)

cv.glmmod <- cv.glmnet(datalog2, y = disease, alpha = 1, family='multinomial', nfolds=20, lambda=mylambda )
plot(cv.glmmod)
best_lambda <- cv.glmmod$lambda.min
abline(v=log(best_lambda))
abline(v=log(best_lambda),col=2)

i<-1
for(i in 1:length(glmmod$beta)){
  imageWithLabels(as.matrix(t(glmmod$beta[[i]])) , row.labels = round(mylambda,digits=3),
                  col=getBlueWhiteRed(151),zlim=c(-2,2),main=names(glmmod$beta)[i],ylab="Ig", xlab="L1")

  abline(v=(which(best_lambda==mylambda))/(length(mylambda)))
}

co <- coef(glmmod,s=best_lambda)

resCoeff <-list()
for(i in 1:length(co)){
  dat <- as.matrix(co[[i]])
  resCoeff[[length(resCoeff)+1]] <- data.frame(Condition=names(co)[i], features = rownames(dat) , Coefficients = dat[,1])
}
resCoeff <- do.call("rbind",resCoeff)

barchart(Coefficients ~ features | Condition , data=resCoeff ,origin=0,scales=list(x=list(rot=90)))

predictors <- list()

for( i in 1:length(co) ) {
  yvals <- (co[[i]][,1])
  prednames <- names(yvals)
  predictors[[names(co)[i]]]<- data.frame(names = prednames, values= yvals)
}

bound <- do.call("rbind", predictors)


bound <- cbind(gsub("\\..*$","",rownames(bound)), bound )
colnames(bound)[1]<-"Condition"


bound <- bound[abs(bound$values) > 0,]
head(bound)

\newpage

Values of coefficient

coefficientTable <- dcast(names ~ Condition, data=bound, value.var = "values")
knitr::kable(coefficientTable )

Values of coefficients in the model.

rownames(coefficientTable) <- coefficientTable[,1]
coefficientTable <- coefficientTable[,2:ncol(coefficientTable)]
imageWithLabels(t(coefficientTable),col = getBlueWhiteRed(51),zlim=c(-8,8))

Same table shown with coefficients color coded for better readability.

Visualize distribution of relevant features

rnam <- rownames(coefficientTable)[2:nrow(coefficientTable)]
variables <- protData$getWideFormat()[, rnam]

variables<-cbind(protData$getWideFormat()[,1:2],variables)
xx <- melt(variables)

bwplot(value ~ Condition | variable, data=xx,scales=list(x=list(rot=90),relation="free") )

Predict the outcomes

classPred <- predict(glmmod, datalog2, s=best_lambda, type="class")



check.model.accuracy <- function(predicted.class, actual.class){
  result.tbl <- as.data.frame(table(predicted.class,actual.class ) ) 
  colnames(result.tbl)[1:2] <- c("Pred","Act")

  F.score.row <- vector(length(unique(result.tbl$Pred)), mode = "list")

  for (cntr in 1:length(unique(result.tbl$Pred)) ){
    pred.class <- unique(result.tbl$Pred)[cntr]
    tp <- sum(result.tbl[result.tbl$Pred==pred.class & result.tbl$Act==pred.class, "Freq"])
    tp.fp <- sum(result.tbl[result.tbl$Pred == pred.class , "Freq" ])
    tp.fn <- sum(result.tbl[result.tbl$Act == pred.class , "Freq" ])
    precision <- tp/tp.fp 
    recovery <- tp/tp.fn
    F.score <- 2*precision*recovery/(precision+recovery)

    res <- data.frame("pred.class" = as.character(pred.class),
                      "precision" = precision,
                      "recovery" = recovery,
                      "F.score" = F.score , stringsAsFactors = FALSE)
    F.score.row[[cntr]] <- res
  }

  return(plyr::rbind.fill(F.score.row))
}

res <- check.model.accuracy(classPred,as.character(disease))

knitr::kable(res, digits=rep(2,ncol(res)))

first part

tab <-cbind(file = rownames(protData$getIntensitiesNoMissing()), truth=as.character(disease) , prediction = classPred)
knitr::kable(tab[1:min(33, nrow(tab)),])

second part

if(nrow(tab) > 34){
  knitr::kable(tab[34:nrow(tab),])
}

xx<-table(disease, classPred)
knitr::kable(xx)

Table columns : predicted category, Table rows : real category.

wrongPred <- round(sum(disease != classPred)/length(disease) * 100)
pred <- rbind(c("Wrong predictions :",wrongPred ),
              c("Correct predictions :", 100 - wrongPred ))

colnames(pred) <- c("", "%")
knitr::kable(pred)

Two group analysis

Adjusted p-values

tdatalog2<-t( protData$getIntensities() )
Ucond <- as.character(Ucond)

nrpics<-sum(1:(length(Ucond)-1))
par(mfrow=c(ceiling(nrpics/2),2))
res<-list()
significant <- list()
plots <- list()
w <- 1

for(i in 1:length(Ucond)){
  if(i < length(Ucond)){

    for(j in (i+1):length(Ucond)){
      cat(" i:", Ucond[i], " j:", Ucond[j], "\n")
      iidx <- grep(Ucond[i], protData$getWideFormat()$Condition)
      jidx <- grep(Ucond[j], protData$getWideFormat()$Condition)
      tvals <- getTValuesForVolcano(tdatalog2[,iidx], tdatalog2[,jidx] )
      res[[length(res)+1]] <- data.frame(comparison = paste(Ucond[i],"-", Ucond[j],sep=""),
                                         label = rownames(tdatalog2),
                                         pvals=tvals$pval ,
                                         group1Mean = apply(tdatalog2[,iidx],1, mean, na.rm=TRUE),
                                         group2Mean = apply(tdatalog2[,jidx],1, mean, na.rm=TRUE),

                                         foldchange=tvals$fchange,
                                         pvals.adj = tvals$pvaladj )
      tmp <- data.frame(names = rownames(tdatalog2), tvals)
      #tmp <- quantable::filterSignificant(tvals$fchange, tvals$pvaladj,rownames(tdatalog2),foldchangethresh = 0.5)

      #tmp <- plyr::rbind.fill(tmp)
      tmp$comparison <- paste(Ucond[i],"-", Ucond[j],sep="")
      significant[[length(significant) + 1]] <- tmp
    }
  }
}

all <- plyr::rbind.fill(res)
head(all)
cc <- data.frame(
  fc=c(0),
  p = c(0.05), 
  Area = c('q-value=0.05')
)

head(all)
quantable::multigroupVolcano(all,effect = "foldchange" ,type="pvals.adj",
                             condition ="comparison", 
                             label="label", xintercept=c(-1,1) )

significant<-lapply(significant,function(x){if(class(x) == "data.frame"){x}})

res <- do.call("rbind",res)
write.table(res, file="output/pvalsSRM.txt", quote=FALSE, sep="\t")
significant <- do.call("rbind", significant)
write.table(significant, file="output/significantSRM.txt", quote=FALSE, sep="\t")

p-values

par(mfrow=c(ceiling(nrpics/2),2))
res<-list()
plots <- list()
tvaluesRES <- list()
for(i in 1:length(Ucond)){
  if(i < length(Ucond)){
    for(j in (i+1):length(Ucond)){
      cat(" i:", Ucond[i], " j:", Ucond[j], "\n")
      iidx <- grep(Ucond[i], protData$getWideFormat()$Condition)
      jidx <- grep(Ucond[j], protData$getWideFormat()$Condition)
      tvals <- getTValuesForVolcano(tdatalog2[,iidx], tdatalog2[,jidx] )
      res[[length(res)+1]] <- data.frame(comparison = paste(Ucond[i],"_", Ucond[j],sep=""),
                                         label = rownames(tdatalog2),
                                         pvals=tvals$pval ,
                                         foldchange=tvals$fchange,
                                         pvals.adj = tvals$pvaladj )

      tvaluesRES[[paste( Ucond[i], "-", Ucond[j],sep="")]] <- tvals

      p<-volcano2G( tvals$fchange, tvals$pvaladj,labels = rownames(tdatalog2), log2FCThresh = 1 , pthresh=0.05,
                    xlab=paste( Ucond[i], "-", Ucond[j],sep=""), ylab="-log10(P.adjusted)")
      plots[[length(plots)+1]] <- p
      print(p)
    }
  }
}

protViz/SRMService documentation built on Nov. 13, 2021, 9:58 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

protViz/SRMService
Report Quantitative Mass Spectrometry Data

In protViz/SRMService: Report Quantitative Mass Spectrometry Data

Overview Heatmap

Overview Heatmap Scaled

How to do the variables correlate

How do the bio replicates correlate

Violin Plot

Bioreplicate

Variables

PCA

Variable Selection

Values of coefficient

Visualize distribution of relevant features

Predict the outcomes

first part

second part

Two group analysis

Adjusted p-values

p-values

R Package Documentation

Browse R Packages

We want your feedback!

protViz/SRMService Report Quantitative Mass Spectrometry Data

In protViz/SRMService: Report Quantitative Mass Spectrometry Data

Overview Heatmap

Overview Heatmap Scaled

How to do the variables correlate

How do the bio replicates correlate

Violin Plot

Bioreplicate

Variables

PCA

Variable Selection

Values of coefficient

Visualize distribution of relevant features

Predict the outcomes

first part

second part

Two group analysis

Adjusted p-values

p-values

R Package Documentation

Browse R Packages

We want your feedback!

protViz/SRMService
Report Quantitative Mass Spectrometry Data