knitr::opts_chunk$set(echo = FALSE, warnings=FALSE, fig.width = 8, fig.height = 8, message = FALSE)
library(plyr) library(dplyr) library(ggplot2) library(ggrepel) library(ggplot2) nvariables <-2 wprot <- protData$getWideNoMissing() wprot <- scale(wprot) tmp <- unique(subset(protData$data, select = c("Condition","Run"))) wprot <- merge(tmp, wprot, by.x="Run", by.y="row.names") p2grp <- wprot iprotV <- data.frame(p2grp[,3:ncol(p2grp)]) Condition = as.numeric(grepl( Condition2Compare, p2grp$Condition)) protnames <- as.character(colnames(iprotV)) Variables <- t(combn(as.character(colnames(iprotV)) , nvariables , FUN = NULL, simplify = TRUE )) colnames(Variables) <- paste("v",1:ncol(Variables),sep="") modelData <- alply(Variables, 1, function(x, iprotV2 = iprotV, Condition2 = Condition){ tmp <- data.frame(Condition = as.factor(Condition2), select(iprotV2, x)) tmp }, iprotV ) resModels <- llply(modelData, function(x){ x <- glm(Condition ~ ., data= x , family = "binomial") x } ) coefficientsExtrat <- function(x){c( coefficients = coefficients(x), deviance = deviance(x) )} resCoefficients <- ldply(resModels,coefficientsExtrat) colnames(resCoefficients) <- c("mID", "c.intercept", paste("c", 1:ncol(Variables), sep=""), "deviance") makeAUC <- function(x, ConditionX ){ x <- predict(x, type="response") pROC::auc(controls = x[ConditionX == 0 ] , cases = x[ConditionX == 1]) } resAUC <- rep(NA, length(resModels)) for(i in 1:length(resModels)){ resAUC[i] <- makeAUC(resModels[[i]], ConditionX = Condition) } resultsCoeff <-data.frame(var = Variables, resCoefficients, AUC= resAUC ) resultsCoeff <- arrange(resultsCoeff, desc(AUC)) nrmodels <-choose(length(unique(protData$data$Protein)), nvariables)
The total number of models is :
nrmodels
.
Histogram of AUC for models using 3 variables.
model <- resModels[[resultsCoeff$mID[1]]] tmp <- melt(model$data) x <- predict(resModels[[resultsCoeff$mID[1]]], type="response") quantable::makeROCplot(x[Condition == 0 ] , x[Condition == 1])
p <- ggplot(tmp , aes(variable, value)) + geom_boxplot(aes(colour = Condition)) + geom_jitter(width = 0.2,aes(colour = Condition)) p
x <- predict(resModels[[resultsCoeff$mID[2]]], type="response") quantable::makeROCplot(x[Condition == 0 ] , x[Condition == 1])
x <- predict(resModels[[resultsCoeff$mID[3]]], type="response") quantable::makeROCplot(x[Condition == 0 ] , x[Condition == 1])
hist(resultsCoeff$AUC, breaks=20, xlab="AUC", main="AUC distribution") abline(v= quantile(resultsCoeff$AUC,0.9),col=2)
knitr::kable(resultsCoeff[1:20, ], digits=2)
Range of AUC values for top models:
top10pcModels <- resultsCoeff[1:(nrow(resCoefficients)/10),] range(top10pcModels$AUC)
plot(sort(table(as.character(unlist(top10pcModels[,1:nvariables]))),decreasing = T)[1:25],las=2,ylab="# of models") coefCounts <- data.frame(sort(table(as.character(unlist(top10pcModels[,1:nvariables]))),decreasing = T)) colnames(coefCounts) <- c("proteinID", "coefCounts") cofw <- data.frame(names = unlist(top10pcModels[,1:nvariables]) , coef = unlist(top10pcModels[,(nvariables+3):(nvariables+2+nvariables)])) cofw$abscoef<- abs(cofw$coef) coefMean <- aggregate(cofw$coef, list(proteinID=cofw$names), median) colnames(coefMean)[2] = "meanCoef" xd <- merge(coefMean, coefCounts, by="proteinID")
knitr::kable(arrange(xd, desc(abs(meanCoef))), caption = "Coefficients sorted by absolute size in top 10% of models.") res$coefficients <- arrange(xd, desc(abs(meanCoef)))
coefThreshold <- 0.5 ggplot(xd, aes(x=meanCoef, y=coefCounts)) + geom_point(shape=1) + geom_text_repel(data=dplyr::filter(xd, xd$meanCoef > coefThreshold | xd$meanCoef < -coefThreshold ), aes(label=proteinID)) xt <- xd[xd$meanCoef > coefThreshold| xd$meanCoef < -coefThreshold,]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.