#### feature selection comparison on Benchmark dataset 1
library(methylDeConv)
####################################
#### Benchmark dataset 1
####################################
library(ExperimentHub)
hub <- ExperimentHub()
query(hub, "FlowSorted.Blood.EPIC")
FlowSorted.Blood.EPIC <- hub[["EH1136"]]
annot <- as.data.frame(colData(FlowSorted.Blood.EPIC))
benchmark <- which(annot$CellType == "MIX")
tmp <- getBeta(preprocessNoob(FlowSorted.Blood.EPIC, dyeMethod = "single"))
benchmark_betamatrix <- tmp[,rownames(annot)[benchmark]]
benchmark_trueprop <- annot[benchmark, c("Bcell", "CD4T", "CD8T", "Mono", "Neu", "NK")]
####################################
#### build reference
####################################
reference_EPIC <- build_reference_EPIC(extend = FALSE)
compTable_EPIC <- ref_compTable(reference_EPIC$ref_betamatrix, reference_EPIC$ref_phenotype)
compTable_EPIC <- compTable_EPIC[,3:8]
save("compTable_EPIC", "benchmark_betamatrix", "benchmark_trueprop", file = "Benchmark_EPICref.RData")
############################
# ARIC performance on Benchmark dataset
############################
### process Deconv files generated by ARIC.
deconv_ARIC <- list()
for (i in 1:9){
file_name <- paste0(paste0("./Downloads/deconv_ARIC_",i),".csv")
dat <- read.csv(file_name, header = T)
rownames(dat) <- dat[,"cell.types"]
dat <- dat[, 2:13]
dat <- t(dat)
deconv_ARIC[[i]] <- dat
}
within_sample_corr(benchmark_trueprop, deconv_ARIC[[1]]) #0.9926955
within_sample_corr(benchmark_trueprop, deconv_ARIC[[2]]) #0.987864
within_sample_corr(benchmark_trueprop, deconv_ARIC[[3]]) #0.9484381
within_sample_corr(benchmark_trueprop, deconv_ARIC[[4]]) #0.9879336
within_sample_corr(benchmark_trueprop, deconv_ARIC[[5]]) #0.9711626
within_sample_corr(benchmark_trueprop, deconv_ARIC[[6]]) #0.9688859
within_sample_corr(benchmark_trueprop, deconv_ARIC[[7]]) #0.8315977
within_sample_corr(benchmark_trueprop, deconv_ARIC[[8]]) #0.9391229
#within_sample_corr(benchmark_trueprop, deconv_ARIC[[9]]) #0.4624484
within_sample_corr_sd <- function(true_proportions, deconv_res){
corr <- rep(NA, 12)
for (i in 1:12){
corr[i] <- cor(as.numeric(true_proportions[i,c("Bcell", "CD4T","CD8T","Mono","Neu","NK")]),
as.numeric(deconv_res[i,c("Bcell", "CD4T","CD8T","Mono","Neu","NK")]), method = "spearman")
}
print(sd(corr))
return(sd(corr))
}
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[1]]) #0.01716289
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[2]]) #0.02548252
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[3]]) #0.08853548
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[4]]) #0.02215361
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[5]]) #0.03537208
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[6]]) #0.05061605
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[7]]) #0.1592075
within_sample_corr_sd(benchmark_trueprop, deconv_ARIC[[8]]) #0.09017252
library(tidyr)
library(ggplot2)
setwd("./Downloads/")
pdf(file = "Figure1.pdf",
width = 16,
height = 8)
cor_Houseman <- c(0.978, 0.969, 0.950, 0.966, 0.947, 0.947, 0.831, 0.964)
cor_RPC <- c(0.978, 0.978, 0.828, 0.983, 0.952, 0.938, 0.800, 0.959)
cor_CBS <- c(0.956, 0.962, 0.839, 0.952, 0.942, 0.950, 0.846, 0.952)
cor_MethylResolver <- c(0.978, 0.983, 0.868, 0.978, 0.956, 0.920, 0.788, 0.955)
cor_ARIC <- c(0.993, 0.988, 0.948, 0.988, 0.971, 0.969, 0.831, 0.939)
df1 <- data.frame(FeatureSelection = c("oneVsAllttest","oneVsAllLimma","pairwiseLimma","pairwiseGlmnet","multiGlmnet",
"glmnetpreselect", "RFpreselect", "Rfepreselect"),
Houseman = cor_Houseman, RPC = cor_RPC, CBS = cor_CBS, MethylResolver = cor_MethylResolver, ARIC = cor_ARIC)
sd_1 <- c(0.0299, 0.0521, 0.0530, 0.0548, 0.0662, 0.0658, 0.1200, 0.0516,
0.0299, 0.0299, 0.1766, 0.0252, 0.0679, 0.0743, 0.1670, 0.0659,
0.0639, 0.0471, 0.1831, 0.0501, 0.0812, 0.0470, 0.1028, 0.0501,
0.0299, 0.0282, 0.1804, 0.0299, 0.0692, 0.1005, 0.1517, 0.0648,
0.0172, 0.0255, 0.0885, 0.0222, 0.0354, 0.0506, 0.1592, 0.0902)
se = sd_1/sqrt(12)
print(ggplot(data = df1 %>% gather(Deconvolution, Spearman_Correlation, -FeatureSelection),
aes(x = factor(FeatureSelection, level = c("oneVsAllttest","oneVsAllLimma","pairwiseLimma","pairwiseGlmnet","multiGlmnet",
"glmnetpreselect", "RFpreselect", "Rfepreselect")), y = Spearman_Correlation, fill = Deconvolution)) +
geom_bar(stat = 'identity', position = 'dodge')+
#geom_text(aes(label= round(Spearman_Correlation,2)), position = position_dodge(0.9))+
geom_errorbar(aes(ymin=Spearman_Correlation-se, ymax=Spearman_Correlation+se), width=.5, position=position_dodge(.9)) +
labs(x = "Feature selection")+
labs(y = "Average Spearman correlation")+
labs(fill = "Deconvolution algorithms")+
coord_cartesian(ylim=c(0.5,1)))
dev.off()
####################################
####################################
#### Simulation datasets
####################################
####################################
##################################
## prepare data for ARIC algorithm
##################################
#### get ARIC deconvolution results for simulated datasets with EPIC reference library
load("Benchmark1_featureSelection.rda")
probes <- list()
probes[[1]] <- probe_1
probes[[2]] <- probe_2
probes[[3]] <- probe_3
probes[[4]] <- probe_4[-1]
probes[[5]] <- probe_5[-1]
probes[[6]] <- probe_6[-1]
probes[[7]] <- probe_7
probes[[8]] <- probe_8
probes[[9]] <- probe_9
## 10 simulated datasets 5 for beta mixture, 5 for gaussian mixture
file_names <- c("beta_sim_data_nonimmune_level1", "beta_sim_data_nonimmune_level2",
"beta_sim_data_nonimmune_level3", "beta_sim_data_nonimmune_level4",
"beta_sim_data_nonimmune_level5",
"gaussian_sim_data_nonimmune_level1", "gaussian_sim_data_nonimmune_level2",
"gaussian_sim_data_nonimmune_level3", "gaussian_sim_data_nonimmune_level4",
"gaussian_sim_data_nonimmune_level5")
for (i in 1:10){
load(paste0(file_names[i],".RData"))
for (j in 1:9){
write.csv(sim_data$mixture_sim_mat[probes[[j]],],
file = paste0(paste0(file_names[i],paste0("_probe_",j)), ".csv"),
row.names = TRUE)
}
}
for (i in 1:9){
write.csv(compTable_EPIC[probes[[i]],],
file = paste0(paste0("compTable_EPIC_probe_",i), ".csv"), row.names = TRUE)
}
##################################
## Run ARIC algorithm
##################################
``` {python}
file_names = ["beta_sim_data_nonimmune_level1", "beta_sim_data_nonimmune_level2",
"beta_sim_data_nonimmune_level3", "beta_sim_data_nonimmune_level4",
"beta_sim_data_nonimmune_level5", "gaussian_sim_data_nonimmune_level1",
"gaussian_sim_data_nonimmune_level2", "gaussian_sim_data_nonimmune_level3",
"gaussian_sim_data_nonimmune_level4", "gaussian_sim_data_nonimmune_level5"]
from ARIC import *
for i in range(10):
for j in range(9):
mixture_file = file_names[i] + "_probe_" + str(j+1) + ".csv"
ref_file = "compTable_EPIC_probe_" + str(j+1) + ".csv"
output_file = file_names[i] + "_probe_" + str(j+1) + "_deconv_ARIC.csv"
ARIC(mix_path = mixture_file, ref_path = ref_file, save_path = output_file, selected_marker = False)
```
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.