knitr::opts_chunk$set(fig.width=8, fig.height=8, fig.path='Figs/', echo=FALSE, warning=FALSE, message=FALSE)
barplot(table(protData$getWideFormat()$Condition))
Number of samples per group.
library(lattice) library(reshape2) library(RColorBrewer) library(quantable) library(ggplot2) library(reshape2) library(glmnet) library(lattice) library(plyr) bwplot(Intensity ~ Condition | Protein, data=protData$data,scales=list(x=list(rot=90),relation="free") )
Distribution of protein fold changes per group.
colors <- rep("", nrow(protData$getWideFormat())) Ucond <- unique(protData$getWideFormat()$Condition) mypalette <- brewer.pal(length(Ucond),"Dark2") for(i in 1:length(Ucond)){ colors[protData$getWideFormat()$Condition==Ucond[i]]<-mypalette[i] } pch <- colors for(i in 1:length(Ucond)){ pch[protData$getWideFormat()$Condition==Ucond[i]]<-i } pch <- as.numeric(pch) plot(1,type="n", axes = FALSE,xlab="", ylab="") legend(1,1, legend=Ucond,cex=1,pch=1:length(Ucond),lwd=4,lty=1,col=mypalette, xjust=0.5, yjust=0.5)
The variable selection algorithm does not work with missing values therefore we replace them with the median fold change of a protein. A possible improvement would be to replace them with the median protein intensity of protein within a group.
simpleheatmap( protData$getIntensities() , palette = getBlueWhiteRed(21),ylab="Ig", main="",RowSideColors=colors)
simpleheatmap( scale(protData$getIntensities()) , palette = getBlueWhiteRed(21), main="",RowSideColors=colors)
simpleheatmap( cor((protData$getIntensities()), use="pairwise.complete.obs", method="spearman")^2 , palette = getGreensScale(21),xlab="Ig",ylab="Ig", main="R^2")
mat <- (cor(t(protData$getIntensities()),use="pairwise.complete.obs", method="spearman"))^2 simpleheatmap(mat,RowSideColors=colors, xlab="patient", ylab="patient", main="R^2",palette = getGreensScale(21))
p <- qplot( Run , Intensity , data=protData$data , geom="violin" , xlab="" , ylab="log10(I)") p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))
p <- qplot( Protein , Intensity , data=protData$data , geom="violin" , xlab="" , ylab="log10(I)") p + stat_summary(fun.y=median,geom='point') +theme(axis.text.x = element_text(angle = 90, hjust = 1))
#datalog2 <- scale(datalog2) sum(is.na(unlist(protData$getIntensitiesNoMissing()))) #colnames(datalog2)<-paste("p", colnames(datalog2), sep=".") fmla <- as.formula(paste(" ~ ", paste(colnames(protData$getIntensitiesNoMissing()), collapse= "+"))) ir.pca <- prcomp( fmla, data=data.frame(protData$getIntensitiesNoMissing()), center = TRUE, scale. = TRUE, na.action=na.omit) par(mfrow=c(3,1)) plot(ir.pca$x[,1:2],col=colors,pch=pch) legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond)) plot(ir.pca$x[,2:3],col=colors,pch=pch) legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond)) plot(ir.pca$x[,3:4],col=colors,pch=pch) legend("topleft",legend=Ucond,col=c(mypalette),pch=1:length(Ucond))
disease <- as.factor(protData$getWideFormat()$Condition) mylambda <- 1/(1:25)^1.7 datalog2 <- as.matrix(protData$getIntensitiesNoMissing()) glmmod <- glmnet(datalog2,y=disease,family='multinomial', lambda =mylambda,maxit= 1000000)
cv.glmmod <- cv.glmnet(datalog2, y = disease, alpha = 1, family='multinomial', nfolds=20, lambda=mylambda ) plot(cv.glmmod) best_lambda <- cv.glmmod$lambda.min abline(v=log(best_lambda)) abline(v=log(best_lambda),col=2)
i<-1 for(i in 1:length(glmmod$beta)){ imageWithLabels(as.matrix(t(glmmod$beta[[i]])) , row.labels = round(mylambda,digits=3), col=getBlueWhiteRed(151),zlim=c(-2,2),main=names(glmmod$beta)[i],ylab="Ig", xlab="L1") abline(v=(which(best_lambda==mylambda))/(length(mylambda))) }
co <- coef(glmmod,s=best_lambda) resCoeff <-list() for(i in 1:length(co)){ dat <- as.matrix(co[[i]]) resCoeff[[length(resCoeff)+1]] <- data.frame(Condition=names(co)[i], features = rownames(dat) , Coefficients = dat[,1]) } resCoeff <- do.call("rbind",resCoeff) barchart(Coefficients ~ features | Condition , data=resCoeff ,origin=0,scales=list(x=list(rot=90))) predictors <- list() for( i in 1:length(co) ) { yvals <- (co[[i]][,1]) prednames <- names(yvals) predictors[[names(co)[i]]]<- data.frame(names = prednames, values= yvals) } bound <- do.call("rbind", predictors) bound <- cbind(gsub("\\..*$","",rownames(bound)), bound ) colnames(bound)[1]<-"Condition" bound <- bound[abs(bound$values) > 0,] head(bound)
\newpage
coefficientTable <- dcast(names ~ Condition, data=bound, value.var = "values") knitr::kable(coefficientTable )
Values of coefficients in the model.
rownames(coefficientTable) <- coefficientTable[,1] coefficientTable <- coefficientTable[,2:ncol(coefficientTable)] imageWithLabels(t(coefficientTable),col = getBlueWhiteRed(51),zlim=c(-8,8))
Same table shown with coefficients color coded for better readability.
rnam <- rownames(coefficientTable)[2:nrow(coefficientTable)] variables <- protData$getWideFormat()[, rnam] variables<-cbind(protData$getWideFormat()[,1:2],variables) xx <- melt(variables) bwplot(value ~ Condition | variable, data=xx,scales=list(x=list(rot=90),relation="free") )
classPred <- predict(glmmod, datalog2, s=best_lambda, type="class") check.model.accuracy <- function(predicted.class, actual.class){ result.tbl <- as.data.frame(table(predicted.class,actual.class ) ) colnames(result.tbl)[1:2] <- c("Pred","Act") F.score.row <- vector(length(unique(result.tbl$Pred)), mode = "list") for (cntr in 1:length(unique(result.tbl$Pred)) ){ pred.class <- unique(result.tbl$Pred)[cntr] tp <- sum(result.tbl[result.tbl$Pred==pred.class & result.tbl$Act==pred.class, "Freq"]) tp.fp <- sum(result.tbl[result.tbl$Pred == pred.class , "Freq" ]) tp.fn <- sum(result.tbl[result.tbl$Act == pred.class , "Freq" ]) precision <- tp/tp.fp recovery <- tp/tp.fn F.score <- 2*precision*recovery/(precision+recovery) res <- data.frame("pred.class" = as.character(pred.class), "precision" = precision, "recovery" = recovery, "F.score" = F.score , stringsAsFactors = FALSE) F.score.row[[cntr]] <- res } return(plyr::rbind.fill(F.score.row)) } res <- check.model.accuracy(classPred,as.character(disease))
knitr::kable(res, digits=rep(2,ncol(res)))
tab <-cbind(file = rownames(protData$getIntensitiesNoMissing()), truth=as.character(disease) , prediction = classPred) knitr::kable(tab[1:min(33, nrow(tab)),])
if(nrow(tab) > 34){ knitr::kable(tab[34:nrow(tab),]) }
xx<-table(disease, classPred) knitr::kable(xx)
Table columns : predicted category, Table rows : real category.
wrongPred <- round(sum(disease != classPred)/length(disease) * 100) pred <- rbind(c("Wrong predictions :",wrongPred ), c("Correct predictions :", 100 - wrongPred )) colnames(pred) <- c("", "%") knitr::kable(pred)
tdatalog2<-t( protData$getIntensities() ) Ucond <- as.character(Ucond) nrpics<-sum(1:(length(Ucond)-1)) par(mfrow=c(ceiling(nrpics/2),2)) res<-list() significant <- list() plots <- list() w <- 1 for(i in 1:length(Ucond)){ if(i < length(Ucond)){ for(j in (i+1):length(Ucond)){ cat(" i:", Ucond[i], " j:", Ucond[j], "\n") iidx <- grep(Ucond[i], protData$getWideFormat()$Condition) jidx <- grep(Ucond[j], protData$getWideFormat()$Condition) tvals <- getTValuesForVolcano(tdatalog2[,iidx], tdatalog2[,jidx] ) res[[length(res)+1]] <- data.frame(comparison = paste(Ucond[i],"-", Ucond[j],sep=""), label = rownames(tdatalog2), pvals=tvals$pval , group1Mean = apply(tdatalog2[,iidx],1, mean, na.rm=TRUE), group2Mean = apply(tdatalog2[,jidx],1, mean, na.rm=TRUE), foldchange=tvals$fchange, pvals.adj = tvals$pvaladj ) tmp <- data.frame(names = rownames(tdatalog2), tvals) #tmp <- quantable::filterSignificant(tvals$fchange, tvals$pvaladj,rownames(tdatalog2),foldchangethresh = 0.5) #tmp <- plyr::rbind.fill(tmp) tmp$comparison <- paste(Ucond[i],"-", Ucond[j],sep="") significant[[length(significant) + 1]] <- tmp } } } all <- plyr::rbind.fill(res) head(all) cc <- data.frame( fc=c(0), p = c(0.05), Area = c('q-value=0.05') )
head(all) quantable::multigroupVolcano(all,effect = "foldchange" ,type="pvals.adj", condition ="comparison", label="label", xintercept=c(-1,1) )
significant<-lapply(significant,function(x){if(class(x) == "data.frame"){x}}) res <- do.call("rbind",res) write.table(res, file="output/pvalsSRM.txt", quote=FALSE, sep="\t") significant <- do.call("rbind", significant) write.table(significant, file="output/significantSRM.txt", quote=FALSE, sep="\t")
par(mfrow=c(ceiling(nrpics/2),2)) res<-list() plots <- list() tvaluesRES <- list() for(i in 1:length(Ucond)){ if(i < length(Ucond)){ for(j in (i+1):length(Ucond)){ cat(" i:", Ucond[i], " j:", Ucond[j], "\n") iidx <- grep(Ucond[i], protData$getWideFormat()$Condition) jidx <- grep(Ucond[j], protData$getWideFormat()$Condition) tvals <- getTValuesForVolcano(tdatalog2[,iidx], tdatalog2[,jidx] ) res[[length(res)+1]] <- data.frame(comparison = paste(Ucond[i],"_", Ucond[j],sep=""), label = rownames(tdatalog2), pvals=tvals$pval , foldchange=tvals$fchange, pvals.adj = tvals$pvaladj ) tvaluesRES[[paste( Ucond[i], "-", Ucond[j],sep="")]] <- tvals p<-volcano2G( tvals$fchange, tvals$pvaladj,labels = rownames(tdatalog2), log2FCThresh = 1 , pthresh=0.05, xlab=paste( Ucond[i], "-", Ucond[j],sep=""), ylab="-log10(P.adjusted)") plots[[length(plots)+1]] <- p print(p) } } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.