knitr::opts_chunk$set(echo = TRUE)

We perform bootstrap to assess the power of the variable selection.

Useful packages

library(Matrix)
library(tidyverse)
library(PintMF)
library(CrIMMix)
library(future)
library(mclust)

Simulate data with CrIMMix package

Four un balanced groups are simulated.

nclust <- 4
nByclust= c(5, 10, 25, 10)
set.seed(55)
c_1 <- simulateY(nclust = nclust,  n_byClust = nByclust, J=1000, prop=0.05, noise=0.2)
c_2 <- simulateY(nclust = nclust,  n_byClust = nByclust, J=500, flavor="binary",
                 params=list(c(p=0.6)), prop=0.01, noise=0.1)

params_beta <- list(c(mean1=-1, mean2=1, sd1=0.5, sd2=0.5))

c_3 <- simulateY(nclust = nclust,  n_byClust = nByclust, J=5000,flavor="beta", 
                 params=params_beta, prop=0.02, noise=0.1)
data <- list(c_1$data, c_2$data, c_3$data)

bootstrap <- lapply(1:sum(nByclust), function (ii){
  data_ii <- lapply(data, function(dd) dd[-ii, ])
  true.clust <- c_1$true.clusters
  data_names <- c("gaussian", "binary", "beta-like")
  names(data_ii) <- data_names
  data_t <- data_ii
  data_t[[3]] <- log2(data_ii[[3]]/(1-data_ii[[3]]))
  R_snf <- SolveInt(Y=data_t, p=4, max.it=5, verbose=FALSE, init_flavor="snf", flavor_mod="glmnet")
})
count_1 <- lapply(bootstrap, function (bb)  which(colSums(abs(bb$H[[1]]))!=0) %>% names)
count_2 <- lapply(bootstrap, function (bb)  which(colSums(abs(bb$H[[2]]))!=0) %>% names)
count_3 <- lapply(bootstrap, function (bb)  which(colSums(abs(bb$H[[3]]))!=0) %>% names)
d1 <- data.frame(count_1 %>% unlist %>% t %>% table, dat="gaussian")
d2 <- data.frame(count_2 %>% unlist %>% t %>% table, dat="binary")
d3 <- data.frame(count_3 %>% unlist %>% t %>% table, dat="beta-like")

d <- rbind(d1, d2, d3)
g <- d%>% ggplot(aes(y=Freq, x=dat))+geom_violin( fill="#999999")+theme_bw()+xlab("")+ylab("Count")

g <- g+theme(axis.text = element_text(size=15), axis.title.y = element_text(size=15))
g
g %>% ggsave(filename="~/Figs_pintMF/bootstrap_simulation.eps", width=6, height=4)
setwd('../')
library("parallel")
library(PintMF)
library(future.apply)
library(dplyr)
source("inst/Simulations_OMICSSMILA/00.setup.R")
k=1 
data <- file[[k]][1:3]

bootstrap_omics <- lapply(1:nrow(data[[1]]), function (ii){
  print(ii)
  print(ii)
  data_ii <- lapply(data, function(dd) dd[-ii, ])
  remove_zero <- function (dat){
    lapply(dat, function(dd){
      idx <- which(colSums(dd)==0)
      if(length(idx)!=0){
        return(dd[, -idx])
      }else{
        return(dd)
      }
    })
  }
  data_filter <- data_ii %>% remove_zero
  data_filter_t <- data_filter
  data_filter_t[["meth"]] <- log2((data_filter[["meth"]]+0.0001)/(1-(data_filter[["meth"]]+0.0001)))%>% t %>% na.omit %>%t


my_meth_results_2 <- data_filter_t %>% SolveInt(p=2, max.it=5, flavor_mod = "glmnet", init_flavor = "snf")
})
count_1 <- lapply(bootstrap_omics, function (bb)  which(colSums(abs(bb$H[[1]]))!=0) %>% names)

count_2 <- lapply(bootstrap_omics, function (bb)  which(colSums(abs(bb$H[[2]]))!=0) %>% names)

count_3 <- lapply(bootstrap_omics, function (bb)  which(colSums(abs(bb$H[[3]]))!=0) %>% names)
d1 <- data.frame(count_1 %>% unlist %>% t %>% table, dat="methylation")
d2 <- data.frame(count_2 %>% unlist %>% t %>% table, dat="expression")
d3 <- data.frame(count_3 %>% unlist %>% t %>% table, dat="proteins")

d <- rbind(d1, d2, d3)
g <- d%>% ggplot(aes(y=Freq, x=dat))+geom_violin( fill="#999999")+theme_bw()+xlab("")+ylab("Count")
g <- g+theme(axis.text = element_text(size=15), axis.title.y = element_text(size=15))
g
#g %>% ggsave(filename="../../Figs/bootstrap_omicssimulation.pdf", width=6, height=4)


CNRGH/pintmf documentation built on Feb. 23, 2022, 12:02 a.m.