library(glmnet) library(reshape2) library(survival) library(tidyverse) library(Vennerable) library(glmSparseNet) library(loose.rock) library(futile.logger)
devtools::load_all() .Last.value <- base.dir(path = '/ssd_home/averissimo/work/rpackages/network.cox-cache') .Last.value <- show.message(FALSE) .Last.value <- flog.layout(layout.format('[~l] ~m')) .Last.value <- flog.appender(appender.tee('logger.txt')) theme_set(theme_minimal())
show.params(params)
my.data <- run.cache(prepare.tcga.survival.data.old, 'brca', 'primary.solid.tumor', # #input.type = 'rna', normalization = params$normalize, log2.pre.normalize = params$log2, # include.negative.survival = FALSE, handle.duplicates = 'keep_first', coding.genes = TRUE, # cache.prefix = 'tcga-data', show.message = TRUE) # clinical <- my.data$clinical # xdata <- my.data$xdata ydata <- my.data$ydata xdata.raw <- my.data$xdata.raw # # xdata.digest.cache <- my.data$xdata.digest xdata.raw.digest.cache <- my.data$xdata.raw.digest ydata.digest.cache <- my.data$ydata.digest # rm(my.data)
note: This is a fixed result, regardless of seed parameter in report.
set.seed(987654321) sample(seq(ncol(xdata)), 9) %>% { xdata[,.] } %>% melt %>% ggplot() + geom_density(aes(value, color = X2)) + #geom_freqpoly(aes(value, color = Var2), bins = 100) + facet_wrap(~ X2, ncol = 3, scales = 'free') + theme(legend.position = 'none') set.seed(987654321) sample(seq(ncol(xdata)), 9) %>% { xdata[,.] } %>% melt %>% ggplot() + #geom_density(aes(value, color = Var2)) + geom_freqpoly(aes(value, color = X2), bins = 100) + facet_wrap(~ X2, ncol = 3, scales = 'free') + theme(legend.position = 'none')
set.seed(params$seed) ixs <- balanced.train.and.test(which(ydata$status), which(!ydata$status), train.perc = params$train.percentage) xdata.test <- xdata[ixs$test,] ydata.test <- ydata[ixs$test,] # xdata.train <- xdata[ixs$train,] ydata.train <- ydata[ixs$train,] xdata.train.digest <- glmSparseNet::digest.cache(xdata.train)
## Using Network or Not # # # # default using string from degree.type network <- params$network if (params$network == 'oldie') { # # Load old matrix used in SPARSA conference my.env <- new.env() load(sprintf('/ssd_home/averissimo/work/rpackages/brca.analysis/data/degree-%.6f.RData', .4), envir = my.env) network <- my.env$degree } else if (params$network == 'string') { # # Load degree of STRING network downloaded from external sources network <- run.cache(string.db.homo.sapiens) %>% { run.cache(build.string.network, ., use.names = 'ensembl') } %>% { .@x[.@x <= params$network.cutoff] <- 0 if (TRUE) { .@x <- (.@x != 0) * 1 } Matrix::colSums(.) + Matrix::rowSums(.) } # # Adds missing genes as nodes with 0 degree network <- colnames(xdata.raw)[!colnames(xdata.raw) %in% names(network)] %>% { c(network, array(0, length(.), dimnames = list(.))) } %>% { .[colnames(xdata.raw)] } } else if (params$network == 'string.pedro') { # # Load degree of STRING network downloaded from external sources my.env <- new.env() load('/ssd_home/averissimo/work/rpackages/network.cox-big.files/saves/degree-string.RData', envir = my.env) ix.names <- colnames(xdata) %in% names(my.env$degree_uw) network <- my.env$degree_uw[colnames(xdata)] } # display Network if (is.character(network)) { flog.info('Network to be used: %s', network) } else { qplot(network, geom = 'freqpoly', bins = 100) + ggtitle(sprintf('Degree of nodes in %s network', params$network)) }
# Transformation function for degree before used as a penalty if (params$network.trans == 'none') { network.trans <- function(x) { return(x) } } else if (params$network.trans == 'hub') { network.trans <- glmSparseNet::hub.heuristic } else if (params$network.trans == 'hub') { network.trans <- glmSparseNet::orphan.heuristic } else { stop('Error: network.trans parameter not recognized.') }
standardize = FALSE
set.seed(params$seed) if (params$foldid) { foldid <- balanced.cv.folds(ydata.train$status)$output } else { foldid <- sample(rep(seq(10), length = nrow(ydata.train))) } set.seed(params$seed) fit.train.nosd <- run.cache(cv.glmSparseNet.mclapply, xdata.train, Surv(ydata.train$time, ydata.train$status), network = network, family = 'cox', alpha = params$alpha, # network.options = network.options.default(trans.fun = network.trans, min.degree = .2, n.cores = params$n.cores, cutoff = params$network.cutoff), # foldid = foldid, mc.cores = params$n.cores, standardize = FALSE, cache.digest = list(xdata.train.digest))
standardize = TRUE
set.seed(params$seed) if (params$foldid) { foldid <- balanced.cv.folds(ydata.train$status)$output } else { foldid <- sample(rep(seq(10), length = nrow(ydata.train))) } set.seed(params$seed) fit.train.sd <- run.cache(cv.glmSparseNet.mclapply, xdata.train, Surv(ydata.train$time, ydata.train$status), network = network, family = 'cox', alpha = params$alpha, # network.options = network.options.default(trans.fun = network.trans, min.degree = .2, n.cores = params$n.cores, cutoff = params$network.cutoff), # foldid = foldid, mc.cores = params$n.cores, standardize = TRUE, cache.digest = list(xdata.train.digest))
standardize = FALSE
)plot(fit.train.nosd)
standardize = FALSE
)plot(fit.train.sd)
best.model.nosd <- coef(fit.train.nosd, s = 'lambda.min')[,1] %>% { .[.!=0] } best.model.sd <- coef(fit.train.sd, s = 'lambda.min')[,1] %>% { .[.!=0] }
c(No.Standadize = length(best.model.nosd), Standardize = length(best.model.sd))
Overlapping features/genes
if(length(best.model.nosd) * length(best.model.sd) * 1 > 0) { vv <- Venn(list(standardize = names(best.model.nosd), non.standarize = names(best.model.sd))) %>% compute.Venn(type = 'squares') gp <- VennThemes(vv) plot(vv, gp = gp) } else { flog.warn('One of the models did not select any features, see section "Selected features"') }
plot.list <- list() if(length(best.model.nosd) > 0) { plot.list <- c(plot.list, list(separate2GroupsCox(best.model.nosd, xdata.train[, names(best.model.nosd)], ydata.train, plot.title = 'standardize = FALSE')$plot)) } else { flog.warn('Model without standardize does not select any variables') } if(length(best.model.sd) > 0) { plot.list <- c(plot.list, list(separate2GroupsCox(best.model.sd, xdata.train[, names(best.model.sd)], ydata.train, plot.title = 'standardize = TRUE')$plot)) } else { flog.warn('Model with standardize does not select any variables') } if (length(plot.list) > 0) multiplot(plotlist = plot.list, ncol = 2)
plot.list <- list() if(length(best.model.nosd) > 0) { plot.list <- c(plot.list, list(separate2GroupsCox(best.model.nosd, xdata.test[, names(best.model.nosd)], ydata.test, plot.title = 'standardize = FALSE')$plot)) } else { flog.warn('Model without standardize does not select any variables') } if(length(best.model.sd) > 0) { plot.list <- c(plot.list, list(separate2GroupsCox(best.model.sd, xdata.test[, names(best.model.sd)], ydata.test, plot.title = 'standardize = TRUE')$plot)) } else { flog.warn('Model with standardize does not select any variables') } if (length(plot.list) > 0) multiplot(plotlist = plot.list, ncol = 2)
# # Use to generate multiple reporst # render.me <- function(my.list, default.list = list(alpha = .7, foldid = FALSE, n.cores = 10, seed = 1985, train.percentage = .7, log2 = FALSE, normalize = 'none', network = 'none', network.cutoff = 0, network.trans = 'none')) { my.list <- taRifx::merge.list(my.list, default.list) lapply(c('center', 'max', 'none'), function(ix) { return(tibble(normalize = c(ix, ix), log2 = c(TRUE,FALSE))) }) %>% melt(id.vars = c('normalize', 'log2')) %>% dplyr::select(1:2) %>% { for(line.ix in seq(nrow(.))) { taRifx::merge.list(list(normalize = .[line.ix, 'normalize'], log2 = .[line.ix, 'log2']), my.list) %>% { .$foldid <- TRUE output_file <- sprintf('baseline_reports/network_%s--cutoff_%.2f--trans_%s--normalize_%s--log2_%s--foldid_%s--seed_%d.html', .$network, .$network.cutoff, .$network.trans, .$normalize, .$log2, .$foldid, .$seed) tryCatch(rmarkdown::render('baseline.Rmd', params = ., output_file = output_file)) # .$foldid <- FALSE # output_file <- sprintf('baseline_reports/network_%s--cutoff_%.2f--trans_%s--normalize_%s--log2_%s--foldid_%s--seed_%d.html', # .$network, .$network.cutoff, .$network.trans, .$normalize, .$log2, .$foldid, .$seed) # tryCatch(rmarkdown::render('baseline.Rmd', params = ., output_file = output_file)) } } } } for(ix.seed in c(1985*2, 1985)) { # tryCatch(render.me(list(seed = ix.seed, network = 'none'))) tryCatch(render.me(list(seed = ix.seed, network.trans = 'hub', network = 'string'))) # tryCatch(render.me(list(seed = ix.seed, network.trans = 'orphan', network = 'string'))) #tryCatch(render.me(list(seed = ix.seed, network.trans = 'hub', network = 'correlation', network.cutoff = .6))) #tryCatch(render.me(list(seed = ix.seed, network.trans = 'hub', network = 'covariance', network.cutoff = .1))) #tryCatch(render.me(list(seed = ix.seed, network.trans = 'hub', network = 'string.pedro'))) #tryCatch(render.me(list(seed = ix.seed, network.trans = 'orphan', network = 'correlation', network.cutoff = .6))) #tryCatch(render.me(list(seed = ix.seed, network.trans = 'orphan', network = 'covariance', network.cutoff = .1))) #tryCatch(render.me(list(seed = ix.seed, network.trans = 'orphan', network = 'string.pedro'))) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.