knitr::opts_chunk$set(echo = TRUE, cache = TRUE, warning = FALSE, message = FALSE)
#library(devtools) #install_github("singha53/amritr") library(amritr) library(mixOmics) library(dplyr); library(tidyr);
data("pathways"); data("breast.TCGA") Y = breast.TCGA$data.train$subtype names(Y) <- rownames(breast.TCGA$data.train$mrna) Y = c(Y[Y == "Basal"][1:10], Y[Y == "Her2"][1:10]) Y[Y == 1] <- "Basal" Y[Y == 2] <- "Her2" Y <- factor(Y) set.seed(123) X = breast.TCGA$data.train$mrna[names(Y), sample(1:200, 30)] ## run biomarker pipeline for a binary response allPanels <- amritr::biomarkerPipeline(X = X, Y = Y, topranked = 30, validation = "Mfold", M = 2, iter = 2, threads = 2, progressBar = TRUE, pathways = pathways) allPanels %>% arrange(desc(Mean)) %>% head
allPanels %>% arrange(Mean) %>% mutate(Panel = 1:nrow(.)) %>% ggplot(aes(x = Panel, y = Mean, color = Type)) + geom_point() + geom_errorbar(aes(ymin = Mean-SD, ymax = Mean+SD)) + customTheme(sizeStripFont = 10, xAngle = 0, hjust = 0.5, vjust = 0.5, xSize = 10, ySize = 10, xAxisSize = 10, yAxisSize = 10) + ylab("AUC - 2x2-fold CV") + xlab("Panels") + geom_hline(yintercept = 0.5, linetype = "dashed")
(topPanel <- allPanels %>% arrange(desc(Mean)) %>% head %>% slice(1)) pcaX <- plsda(X[, unlist(strsplit(topPanel$Genes, "_"))], Y) plotIndiv(pcaX, group = Y, ellipse = TRUE, star = TRUE)
X.train <- breast.TCGA$data.train[c("mrna", "mirna")] Y.train <- breast.TCGA$data.train$subtype X.test <- breast.TCGA$data.test[c("mrna", "mirna")] Y.test <- breast.TCGA$data.test$subtype M = 2; iter = 2; cpus = 2; ## Build panel and estimate test performance result <- amritr::enet(X = do.call(cbind, X.train), Y = Y.train, alpha=1, family="multinomial", lambda = NULL, X.test = do.call(cbind, X.test), Y.test = Y.test, filter = "none", topranked = 50) lapply(X.train, function(i){ intersect(colnames(i), result$enet.panel) }) ## Estimate panel performance using cross-validation cv <- amritr::perf.enet(result, validation = "Mfold", M = M, iter = iter, threads = cpus, progressBar = FALSE) concat_enetErrTrain_tuneConcat <- filter(cv$perf, ErrName == "BER")[-1] concat_enetErrTest_tuneConcat <- c(result$perfTest["BER"], NA) names(concat_enetErrTest_tuneConcat) <- names(concat_enetErrTrain_tuneConcat) ## Concateantion panel error rate rbind(concat_enetErrTrain_tuneConcat, concat_enetErrTest_tuneConcat) %>% mutate(Set = c("Train", "Test"))
alphaList = list(1, 1) lambdaList = list(NULL, NULL) ## Build panel and estimate test performance ensembleMod <- ensembleEnet(X.train, Y.train, alphaList = alphaList, lambdaList = lambdaList, X.test = X.test, Y.test = Y.test, filter = "none", topranked = 50) ensembleResult <- ensembleMod$result %>% zip_nPure() ensemblePanel <- ensembleResult$enet.panel ensemblePanel ## Estimate panel performance using cross-validation ensembleTrain <- perfEnsemble(object=ensembleMod, validation = "Mfold", M = M, iter = iter, threads = cpus, progressBar = TRUE) ensemble_enetLength <- lapply(ensemblePanel, length) ensemble_enetErrTrain_tuneEnsemble <- filter(ensembleTrain$perf, ErrName == "BER")[-1] ensemble_enetErrTest_tuneEnsemble <- c(ensembleMod$perfTest["BER"], NA) names(ensemble_enetErrTest_tuneEnsemble) <- names(ensemble_enetErrTrain_tuneEnsemble) ## Ensemble panel error rate rbind(ensemble_enetErrTrain_tuneEnsemble, ensemble_enetErrTest_tuneEnsemble) %>% mutate(Set = c("Train", "Test"))
design <- setDesign(X.train, corCutOff = 0.6, plotMat = FALSE) ncomp <- nlevels(Y.train) - 1 list.keepX <- lapply(ensemble_enetLength, function(i){ rep(round(i/ncomp, 0), ncomp) }) TCGA.block.splsda = block.splsda(X = X.train, Y = Y.train, ncomp = ncomp, keepX = list.keepX, design = design, scheme = "centroid") diabloFeat <- lapply(TCGA.block.splsda$loadings[-(length(X.train)+1)], function(x) unique(as.character(as.matrix(apply(x, 2, function(i) names(i)[which(i != 0)]))))) diabloFeat ## training error cv <- perf(TCGA.block.splsda, validation = "Mfold", folds = M, cpus = cpus, nrepeat = iter) err <- extractErr(cv) err$Comp <- factor(err$Comp, levels = paste("comp", 1:ncomp, sep = ".")) diablo_enetErrTrain <- err %>% filter(Type == "centroids.dist", Class == "Overall.BER", EnsembleMode == "wMajVote", Dataset == "DIABLO", Comp == paste("comp", ncomp, sep = ".")) diablo_enetErrTrain <- diablo_enetErrTrain[c("meanErr", "sdErr")] ## test error diabloTest <- predict(TCGA.block.splsda, X.test, method = "all") diabloTestConsensus <- lapply(diabloTest$WeightedVote, function(i){ predY <- apply(i, 2, function(z){ temp <- table(factor(z, levels = levels(Y.test)), Y.test) diag(temp) <- 0 error = c(colSums(temp)/summary(Y.test), sum(temp)/length(Y.test), mean(colSums(temp)/summary(Y.test))) names(error) <- c(names(error)[1:nlevels(Y.test)], "ER", "BER") error }) }) diablo_enetErrTest <- c(diabloTestConsensus$centroids.dist["BER", paste("comp", ncomp, sep = "")], NA) names(diablo_enetErrTest) <- names(diablo_enetErrTrain) ## DIABLO panel error rate rbind(diablo_enetErrTrain, diablo_enetErrTest) %>% mutate(Set = c("Train", "Test"))
datList = list(ensemble = as.character(unlist(ensemblePanel)), diablo=as.character(unlist(diabloFeat))) amritr::venndiagram(datList = datList, circleNames = c("Ensemble", "DIABLO"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.