knitr::opts_chunk$set(fig.width=8, fig.height=8, fig.path='Figs/',
                      echo=FALSE, warning=FALSE, message=FALSE)
barplot(table(protData$getWideFormat()$Condition))

Number of samples per group.

library(lattice)
library(reshape2)
library(RColorBrewer)
library(quantable)
library(ggplot2)
library(reshape2)
library(glmnet)
library(lattice)
library(plyr)


bwplot(Intensity ~ Condition | Protein, data=protData$data,scales=list(x=list(rot=90),relation="free") ) 

Distribution of protein fold changes per group.

colors <- rep("", nrow(protData$getWideFormat()))

Ucond <- unique(protData$getWideFormat()$Condition)
mypalette <- brewer.pal(length(Ucond),"Dark2")

for(i in 1:length(Ucond)){
  colors[protData$getWideFormat()$Condition==Ucond[i]]<-mypalette[i]
}

pch <- colors

for(i in 1:length(Ucond)){
  pch[protData$getWideFormat()$Condition==Ucond[i]]<-i
}

pch <- as.numeric(pch)
plot(1,type="n", axes = FALSE,xlab="", ylab="")
legend(1,1, legend=Ucond,cex=1,pch=1:length(Ucond),lwd=4,lty=1,col=mypalette, xjust=0.5, yjust=0.5)

The variable selection algorithm does not work with missing values therefore we replace them with the median fold change of a protein. A possible improvement would be to replace them with the median protein intensity of protein within a group.

Overview Heatmap

simpleheatmap( protData$getIntensities() , palette = getBlueWhiteRed(21),ylab="Ig", main="",RowSideColors=colors)

Overview Heatmap Scaled

simpleheatmap( scale(protData$getIntensities()) , palette = getBlueWhiteRed(21), main="",RowSideColors=colors)

How to do the variables correlate

simpleheatmap( cor((protData$getIntensities()), use="pairwise.complete.obs", method="spearman")^2 , palette = getGreensScale(21),xlab="Ig",ylab="Ig", main="R^2")

How do the bio replicates correlate

mat <- (cor(t(protData$getIntensities()),use="pairwise.complete.obs", method="spearman"))^2

simpleheatmap(mat,RowSideColors=colors, xlab="patient", ylab="patient", main="R^2",palette = getGreensScale(21))

Violin Plot

Bioreplicate

p <- qplot( Run , Intensity , data=protData$data , geom="violin" , xlab="" , ylab="log10(I)")
p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))

Variables

p <- qplot( Protein , Intensity , data=protData$data , geom="violin" , xlab="" , ylab="log10(I)")
p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))

PCA

#datalog2 <- scale(datalog2)
sum(is.na(unlist(protData$getIntensitiesNoMissing())))
#colnames(datalog2)<-paste("p", colnames(datalog2), sep=".")

fmla <- as.formula(paste(" ~ ", paste(colnames(protData$getIntensitiesNoMissing()), collapse= "+")))

ir.pca <- prcomp( fmla, data=data.frame(protData$getIntensitiesNoMissing()), center = TRUE, scale. = TRUE, na.action=na.omit)
par(mfrow=c(3,1))

plot(ir.pca$x[,1:2],col=colors,pch=pch)
legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))

plot(ir.pca$x[,2:3],col=colors,pch=pch)
legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))

plot(ir.pca$x[,3:4],col=colors,pch=pch)
legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))

Variable Selection

disease <- as.factor(protData$getWideFormat()$Condition)
mylambda <- 1/(1:25)^1.7
datalog2 <- as.matrix(protData$getIntensitiesNoMissing())

glmmod <- glmnet(datalog2,y=disease,family='multinomial', lambda =mylambda,maxit= 1000000)
cv.glmmod <- cv.glmnet(datalog2, y = disease, alpha = 1, family='multinomial', nfolds=20, lambda=mylambda )
plot(cv.glmmod)
best_lambda <- cv.glmmod$lambda.min
abline(v=log(best_lambda))
abline(v=log(best_lambda),col=2)
i<-1
for(i in 1:length(glmmod$beta)){
  imageWithLabels(as.matrix(t(glmmod$beta[[i]])) , row.labels = round(mylambda,digits=3),
                  col=getBlueWhiteRed(151),zlim=c(-2,2),main=names(glmmod$beta)[i],ylab="Ig", xlab="L1")

  abline(v=(which(best_lambda==mylambda))/(length(mylambda)))
}
co <- coef(glmmod,s=best_lambda)

resCoeff <-list()
for(i in 1:length(co)){
  dat <- as.matrix(co[[i]])
  resCoeff[[length(resCoeff)+1]] <- data.frame(Condition=names(co)[i], features = rownames(dat) , Coefficients = dat[,1])
}
resCoeff <- do.call("rbind",resCoeff)

barchart(Coefficients ~ features | Condition , data=resCoeff ,origin=0,scales=list(x=list(rot=90)))

predictors <- list()

for( i in 1:length(co) ) {
  yvals <- (co[[i]][,1])
  prednames <- names(yvals)
  predictors[[names(co)[i]]]<- data.frame(names = prednames, values= yvals)
}

bound <- do.call("rbind", predictors)


bound <- cbind(gsub("\\..*$","",rownames(bound)), bound )
colnames(bound)[1]<-"Condition"


bound <- bound[abs(bound$values) > 0,]
head(bound)

\newpage

Values of coefficient

coefficientTable <- dcast(names ~ Condition, data=bound, value.var = "values")
knitr::kable(coefficientTable )

Values of coefficients in the model.

rownames(coefficientTable) <- coefficientTable[,1]
coefficientTable <- coefficientTable[,2:ncol(coefficientTable)]
imageWithLabels(t(coefficientTable),col = getBlueWhiteRed(51),zlim=c(-8,8))

Same table shown with coefficients color coded for better readability.

Visualize distribution of relevant features

rnam <- rownames(coefficientTable)[2:nrow(coefficientTable)]
variables <- protData$getWideFormat()[, rnam]

variables<-cbind(protData$getWideFormat()[,1:2],variables)
xx <- melt(variables)

bwplot(value ~ Condition | variable, data=xx,scales=list(x=list(rot=90),relation="free") )

Predict the outcomes

classPred <- predict(glmmod, datalog2, s=best_lambda, type="class")



check.model.accuracy <- function(predicted.class, actual.class){
  result.tbl <- as.data.frame(table(predicted.class,actual.class ) ) 
  colnames(result.tbl)[1:2] <- c("Pred","Act")

  F.score.row <- vector(length(unique(result.tbl$Pred)), mode = "list")

  for (cntr in 1:length(unique(result.tbl$Pred)) ){
    pred.class <- unique(result.tbl$Pred)[cntr]
    tp <- sum(result.tbl[result.tbl$Pred==pred.class & result.tbl$Act==pred.class, "Freq"])
    tp.fp <- sum(result.tbl[result.tbl$Pred == pred.class , "Freq" ])
    tp.fn <- sum(result.tbl[result.tbl$Act == pred.class , "Freq" ])
    precision <- tp/tp.fp 
    recovery <- tp/tp.fn
    F.score <- 2*precision*recovery/(precision+recovery)

    res <- data.frame("pred.class" = as.character(pred.class),
                      "precision" = precision,
                      "recovery" = recovery,
                      "F.score" = F.score , stringsAsFactors = FALSE)
    F.score.row[[cntr]] <- res
  }

  return(plyr::rbind.fill(F.score.row))
}

res <- check.model.accuracy(classPred,as.character(disease))
knitr::kable(res, digits=rep(2,ncol(res)))

first part

tab <-cbind(file = rownames(protData$getIntensitiesNoMissing()), truth=as.character(disease) , prediction = classPred)
knitr::kable(tab[1:min(33, nrow(tab)),])

second part

if(nrow(tab) > 34){
  knitr::kable(tab[34:nrow(tab),])
}
xx<-table(disease, classPred)
knitr::kable(xx)

Table columns : predicted category, Table rows : real category.

wrongPred <- round(sum(disease != classPred)/length(disease) * 100)
pred <- rbind(c("Wrong predictions :",wrongPred ),
              c("Correct predictions :", 100 - wrongPred ))

colnames(pred) <- c("", "%")
knitr::kable(pred)

Two group analysis

Adjusted p-values

tdatalog2<-t( protData$getIntensities() )
Ucond <- as.character(Ucond)

nrpics<-sum(1:(length(Ucond)-1))
par(mfrow=c(ceiling(nrpics/2),2))
res<-list()
significant <- list()
plots <- list()
w <- 1

for(i in 1:length(Ucond)){
  if(i < length(Ucond)){

    for(j in (i+1):length(Ucond)){
      cat(" i:", Ucond[i], " j:", Ucond[j], "\n")
      iidx <- grep(Ucond[i], protData$getWideFormat()$Condition)
      jidx <- grep(Ucond[j], protData$getWideFormat()$Condition)
      tvals <- getTValuesForVolcano(tdatalog2[,iidx], tdatalog2[,jidx] )
      res[[length(res)+1]] <- data.frame(comparison = paste(Ucond[i],"-", Ucond[j],sep=""),
                                         label = rownames(tdatalog2),
                                         pvals=tvals$pval ,
                                         group1Mean = apply(tdatalog2[,iidx],1, mean, na.rm=TRUE),
                                         group2Mean = apply(tdatalog2[,jidx],1, mean, na.rm=TRUE),

                                         foldchange=tvals$fchange,
                                         pvals.adj = tvals$pvaladj )
      tmp <- data.frame(names = rownames(tdatalog2), tvals)
      #tmp <- quantable::filterSignificant(tvals$fchange, tvals$pvaladj,rownames(tdatalog2),foldchangethresh = 0.5)

      #tmp <- plyr::rbind.fill(tmp)
      tmp$comparison <- paste(Ucond[i],"-", Ucond[j],sep="")
      significant[[length(significant) + 1]] <- tmp
    }
  }
}

all <- plyr::rbind.fill(res)
head(all)
cc <- data.frame(
  fc=c(0),
  p = c(0.05), 
  Area = c('q-value=0.05')
)
head(all)
quantable::multigroupVolcano(all,effect = "foldchange" ,type="pvals.adj",
                             condition ="comparison", 
                             label="label", xintercept=c(-1,1) )
significant<-lapply(significant,function(x){if(class(x) == "data.frame"){x}})

res <- do.call("rbind",res)
write.table(res, file="output/pvalsSRM.txt", quote=FALSE, sep="\t")
significant <- do.call("rbind", significant)
write.table(significant, file="output/significantSRM.txt", quote=FALSE, sep="\t")

p-values

par(mfrow=c(ceiling(nrpics/2),2))
res<-list()
plots <- list()
tvaluesRES <- list()
for(i in 1:length(Ucond)){
  if(i < length(Ucond)){
    for(j in (i+1):length(Ucond)){
      cat(" i:", Ucond[i], " j:", Ucond[j], "\n")
      iidx <- grep(Ucond[i], protData$getWideFormat()$Condition)
      jidx <- grep(Ucond[j], protData$getWideFormat()$Condition)
      tvals <- getTValuesForVolcano(tdatalog2[,iidx], tdatalog2[,jidx] )
      res[[length(res)+1]] <- data.frame(comparison = paste(Ucond[i],"_", Ucond[j],sep=""),
                                         label = rownames(tdatalog2),
                                         pvals=tvals$pval ,
                                         foldchange=tvals$fchange,
                                         pvals.adj = tvals$pvaladj )

      tvaluesRES[[paste( Ucond[i], "-", Ucond[j],sep="")]] <- tvals

      p<-volcano2G( tvals$fchange, tvals$pvaladj,labels = rownames(tdatalog2), log2FCThresh = 1 , pthresh=0.05,
                    xlab=paste( Ucond[i], "-", Ucond[j],sep=""), ylab="-log10(P.adjusted)")
      plots[[length(plots)+1]] <- p
      print(p)
    }
  }
}


protViz/SRMService documentation built on Nov. 13, 2021, 9:58 a.m.