knitr::opts_chunk$set(echo = TRUE)
We perform bootstrap to assess the power of the variable selection.
library(Matrix) library(tidyverse) library(PintMF) library(CrIMMix) library(future) library(mclust)
Four un balanced groups are simulated.
nclust <- 4 nByclust= c(5, 10, 25, 10)
set.seed(55) c_1 <- simulateY(nclust = nclust, n_byClust = nByclust, J=1000, prop=0.05, noise=0.2) c_2 <- simulateY(nclust = nclust, n_byClust = nByclust, J=500, flavor="binary", params=list(c(p=0.6)), prop=0.01, noise=0.1) params_beta <- list(c(mean1=-1, mean2=1, sd1=0.5, sd2=0.5)) c_3 <- simulateY(nclust = nclust, n_byClust = nByclust, J=5000,flavor="beta", params=params_beta, prop=0.02, noise=0.1) data <- list(c_1$data, c_2$data, c_3$data) bootstrap <- lapply(1:sum(nByclust), function (ii){ data_ii <- lapply(data, function(dd) dd[-ii, ]) true.clust <- c_1$true.clusters data_names <- c("gaussian", "binary", "beta-like") names(data_ii) <- data_names data_t <- data_ii data_t[[3]] <- log2(data_ii[[3]]/(1-data_ii[[3]])) R_snf <- SolveInt(Y=data_t, p=4, max.it=5, verbose=FALSE, init_flavor="snf", flavor_mod="glmnet") })
count_1 <- lapply(bootstrap, function (bb) which(colSums(abs(bb$H[[1]]))!=0) %>% names) count_2 <- lapply(bootstrap, function (bb) which(colSums(abs(bb$H[[2]]))!=0) %>% names) count_3 <- lapply(bootstrap, function (bb) which(colSums(abs(bb$H[[3]]))!=0) %>% names)
d1 <- data.frame(count_1 %>% unlist %>% t %>% table, dat="gaussian") d2 <- data.frame(count_2 %>% unlist %>% t %>% table, dat="binary") d3 <- data.frame(count_3 %>% unlist %>% t %>% table, dat="beta-like") d <- rbind(d1, d2, d3) g <- d%>% ggplot(aes(y=Freq, x=dat))+geom_violin( fill="#999999")+theme_bw()+xlab("")+ylab("Count") g <- g+theme(axis.text = element_text(size=15), axis.title.y = element_text(size=15)) g
g %>% ggsave(filename="~/Figs_pintMF/bootstrap_simulation.eps", width=6, height=4)
setwd('../') library("parallel") library(PintMF) library(future.apply) library(dplyr) source("inst/Simulations_OMICSSMILA/00.setup.R")
k=1 data <- file[[k]][1:3] bootstrap_omics <- lapply(1:nrow(data[[1]]), function (ii){ print(ii) print(ii) data_ii <- lapply(data, function(dd) dd[-ii, ]) remove_zero <- function (dat){ lapply(dat, function(dd){ idx <- which(colSums(dd)==0) if(length(idx)!=0){ return(dd[, -idx]) }else{ return(dd) } }) } data_filter <- data_ii %>% remove_zero data_filter_t <- data_filter data_filter_t[["meth"]] <- log2((data_filter[["meth"]]+0.0001)/(1-(data_filter[["meth"]]+0.0001)))%>% t %>% na.omit %>%t my_meth_results_2 <- data_filter_t %>% SolveInt(p=2, max.it=5, flavor_mod = "glmnet", init_flavor = "snf") })
count_1 <- lapply(bootstrap_omics, function (bb) which(colSums(abs(bb$H[[1]]))!=0) %>% names) count_2 <- lapply(bootstrap_omics, function (bb) which(colSums(abs(bb$H[[2]]))!=0) %>% names) count_3 <- lapply(bootstrap_omics, function (bb) which(colSums(abs(bb$H[[3]]))!=0) %>% names)
d1 <- data.frame(count_1 %>% unlist %>% t %>% table, dat="methylation") d2 <- data.frame(count_2 %>% unlist %>% t %>% table, dat="expression") d3 <- data.frame(count_3 %>% unlist %>% t %>% table, dat="proteins") d <- rbind(d1, d2, d3) g <- d%>% ggplot(aes(y=Freq, x=dat))+geom_violin( fill="#999999")+theme_bw()+xlab("")+ylab("Count") g <- g+theme(axis.text = element_text(size=15), axis.title.y = element_text(size=15)) g
#g %>% ggsave(filename="../../Figs/bootstrap_omicssimulation.pdf", width=6, height=4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.