setwd("/home/asya/Dropbox (Partners HealthCare)/code/ASE/plot_figures") library(tidyverse) library(cowplot) library(eulerr) library(gridExtra) source("../R/ASE_functions.R") source("../R/PerformAIAnalysis_CC.R") source("../R/DownstreamASE.R") source("../plot_figures/utilPlots.R") source("../plot_figures/takedata30mln.R") print("data loaded")
# MLN = 30 # # # Load X-mln data: # NEB_data = GetGatkPipelineTabs(paste0("../../../data/kidney/submln/", "MLN", MLN,"_SG", 1:6, "_N955_", "NEB", "_R1_merged_v2.mln", MLN,"_trial5_processed_gene_extended2.txt"), c(5,5,5,5,5,5), multiple = T) # SMART10ng_data = GetGatkPipelineTabs(paste0("../../../data/kidney/submln/", "MLN", MLN,"_SG", 7:12, "_N955_", "SMARTseq_10_ng", "_R1_merged_v2.mln", MLN,"_trial5_processed_gene_extended2.txt"), c(5,5,5,5,5,5), multiple = T) # SMART100pg_data = GetGatkPipelineTabs(paste0("../../../data/kidney/submln/", "MLN", MLN,"_SG", 1:6, "_N955_", "SMARTseq_100_pg", "_R1_merged_v2.mln", MLN,"_trial5_processed_gene_extended2.txt"), c(5,5,5,5,5,5), multiple = T) # # data = list(NEB_data, SMART10ng_data, SMART100pg_data) # rm(NEB_data) # rm(SMART10ng_data) # rm(SMART100pg_data) # # removeX <- function(DF, legitim_chrgenes){ # return(DF[DF$ensembl_gene_id %in% legitim_chrgenes$gene, ]) # } # chrgenes <- read.delim(paste0("../../../data/kidney/submln/MLN", # MLN, "_SG3_N955_NEB_R1_merged_v2.mln", # MLN, "_trial5_processed_gene_extended2.txt"))[, c("ensembl_gene_id", "chr")] # # data_30mln_noX = lapply(data, function(x){ # x[x$ensembl_gene_id %in% chrgenes[chrgenes$chr!="chrX" & chrgenes$chr!="chrY", "ensembl_gene_id"], ] # }) # # CC = lapply(c(paste0("../../../data/kidney/submln/NEB_CorrConsts_", MLN,"mln_1.05.RData"), # paste0("../../../data/kidney/submln/SMARTseq_10_ng_CorrConsts_", MLN,"mln_1.05.RData"), # paste0("../../../data/kidney/submln/SMARTseq_100_pg_CorrConsts_", MLN,"mln_1.05.RData")), # function(file){ # load(file) # sapply(out_XXmln_SMART10ng, function(x){x$fittedCC}) # })
reps_6_from30 = list(c(3,8,13,19,24,28), c(4,10,12,19,24,28), c(5,10,12,16,23,27)) # reps_6_from30 = list((1:5)*5+1, (0:5)*5+1, (0:5)*5+1) list_of_datas = data_30mln_noX; list_of_datas[[1]] = data_30mln_noX[[1]][, -c(2:11)] list_of_consts = list(CC[[1]], CC[[2]], CC[[3]]) list_of_constsM = list(mean(CC[[1]][6:15]), mean(CC[[2]]), mean(CC[[3]])) list_of_libprepnames = list("NEBNext (100ng)", "SMARTseq (10ng)", "SMARTseq (0.1ng)") list_of_Nreps = list(6,6,6)
res62_df <- lapply(1:3, function(m){ #for_6_df <- CountsToAI(list_of_datas[[m]], meth="mergedToProportion")$AI for_6_df <- CountsToAI(data_30mln_noX[[m]], reps_6_from30[[m]], meth="mergedToProportion")$AI df <- sapply(1:length(list_of_consts[[m]]), function(j){ pairs_sample = reps_6_from30[[m]][combn(list_of_Nreps[[m]], 2)[, j]] #pairs_sample = (combn(list_of_Nreps[[m]], 2)[, j]-1)*5 + sample(1:5, 2, replace=T) res62 <- PerformBinTestAIAnalysisForConditionNPointVect_knownCC(data_30mln_noX[[m]], #list_of_datas[[m]], vectReps=pairs_sample, vectRepsCombsCC = list_of_consts[[m]][j], ptVect = for_6_df, thr=10) FP_BTCC <- sum(res62$BT_CC, na.rm = T) FP_BT <- sum(res62$BT, na.rm = T) all_BTCC <- length(res62$BT_CC) - sum(is.na(res62$BT_CC)) all_BT <- length(res62$BT) - sum(is.na(res62$BT)) return(c(FP_BTCC, FP_BT, all_BTCC, all_BT)) }) df <- data.frame(t(df)) colnames(df) <- c("FP_BTCC", "FP_BT", "all_BTCC", "all_BT") df$c2_FP_BTCC_rate <- df$FP_BTCC / df$all_BTCC df$a2_FP_BT_rate <- df$FP_BT / df$all_BT print(df) return(df) }) res62_df_NEB <- res62_df[[1]] res62_df_NEB$experiment <- "NEBNext (100ng)" res62_df_SMART10 <- res62_df[[2]] res62_df_SMART10$experiment <- "SMARTseq (10ng)" res62_df_SMART100 <- res62_df[[3]] res62_df_SMART100$experiment <- "SMARTseq (0.1ng)" res62_df_all <- data.frame(rbind(res62_df_NEB, res62_df_SMART10, res62_df_SMART100)) res62_df_all <- res62_df_all[,c(7,5:6)] %>% gather(key="method", value = "FP_rate", -experiment)
res61_df <- lapply(1:3, function(m){ for_6_df <- CountsToAI(data_30mln_noX[[m]], reps_6_from30[[m]], meth="mergedToProportion")$AI df <- sapply(1:list_of_Nreps[[m]], function(j){ res61 <- PerformBinTestAIAnalysisForConditionNPointVect_knownCC(data_30mln_noX[[m]], #list_of_datas[[m]], vectReps=reps_6_from30[[m]][j], vectRepsCombsCC = list_of_consts[[m]][j], ptVect = for_6_df, thr=10) FP_BT1 <- sum(res61$BT, na.rm = T) all_BT1 <- length(res61$BT) - sum(is.na(res61$BT)) return(c(FP_BT1, all_BT1)) }) df <- data.frame(t(df)) colnames(df) <- c("FP_BT1", "all_BT1") df$a1_FP_BT1_rate <- df$FP_BT1 / df$all_BT1 print(df) return(df) }) res61_df_NEB <- res61_df[[1]] res61_df_NEB$experiment <- "NEBNext (100ng)" res61_df_SMART10 <- res61_df[[2]] res61_df_SMART10$experiment <- "SMARTseq (10ng)" res61_df_SMART100 <- res61_df[[3]] res61_df_SMART100$experiment <- "SMARTseq (0.1ng)" res61_df_all <- data.frame(rbind(res61_df_NEB, res61_df_SMART10, res61_df_SMART100)) res61_df_all <- res61_df_all[,c(4,3)] %>% gather(key="method", value = "FP_rate", -experiment)
plt4B <- ggplot(rbind(res62_df_all,res61_df_all), aes(x=method, y=FP_rate, col=experiment)) + geom_boxplot() + xlab("") + ylab("False positive rate") + theme_bw() + theme(legend.position="right", text = element_text(size=18)) + #scale_color_manual(labels=methods, # values=wes_palette(n=3, name="FantasticFox1")) scale_x_discrete(labels=c("1 replicate,\nno correction", "2 replicates,\nno correction", "2 replicetes,\nQCC correction")) + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) plt4B
res621 =rbind(res62_df_all,res61_df_all) res621$method = as.factor(res621$method) levels(res621$method) = c("1 replicate,\nno correction", "2 replicates,\nno correction", "2 replicates,\nQCC correction") ggplot(res621, aes(x=experiment, y=FP_rate, col=experiment)) + geom_boxplot(lwd = 1.1) + facet_grid(~method) + xlab("") + ylab("False positive rate") + theme_bw() + theme(legend.position="None", text = element_text(size=28)) + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + scale_x_discrete(labels=c("Exp 1", "Exp 2", "Exp 3"))
data1c = sapply(reps_6_from30[[1]][2:6], function(i){rowSums(data_30mln_noX[[1]][, c(i*2-1,i*2)])}) data2c = sapply(reps_6_from30[[2]][1:6], function(i){rowSums(data_30mln_noX[[2]][, c(i*2-1,i*2)])}) data3c = sapply(reps_6_from30[[3]][1:6], function(i){rowSums(data_30mln_noX[[3]][, c(i*2-1,i*2)])}) dataC = data.frame(id = data_30mln_noX[[1]]$ensembl_gene_id, mc1 = rowMeans(data1c), sdc1 = apply(data1c, 1, sd), vc1 = apply(data1c, 1, var), mc2 = rowMeans(data2c), sdc2 = apply(data2c, 1, sd), vc2 = apply(data2c, 1, var), mc3 = rowMeans(data3c), sdc3 = apply(data3c, 1, sd), vc3 = apply(data3c, 1, var)) dataCext1 = data.frame(id = data_30mln_noX[[1]]$ensembl_gene_id, mc = rowMeans(data1c), sdc = apply(data1c, 1, sd), vc = apply(data1c, 1, var), who = list_of_libprepnames[[1]], what = " original") dataCext2 = data.frame(id = data_30mln_noX[[1]]$ensembl_gene_id, mc = rowMeans(data2c), sdc = apply(data2c, 1, sd), vc = apply(data2c, 1, var), who = list_of_libprepnames[[2]], what = " original") dataCext3 = data.frame(id = data_30mln_noX[[1]]$ensembl_gene_id, mc = rowMeans(data3c), sdc = apply(data3c, 1, sd), vc = apply(data3c, 1, var), who = list_of_libprepnames[[3]], what = " original") dataCext1acc = dataCext1 dataCext1acc$vc = dataCext1acc$vc/(list_of_constsM[[1]])**2 dataCext1acc$what = "divided by QCC^2" dataCext2acc = dataCext2 dataCext2acc$vc = dataCext2acc$vc/(list_of_constsM[[2]])**2 dataCext2acc$what = "divided by QCC^2" dataCext3acc = dataCext3 dataCext3acc$vc = dataCext3acc$vc/(list_of_constsM[[3]])**2 dataCext3acc$what = "divided by QCC^2" dataCext = rbind(dataCext1, dataCext2, dataCext3) dataCextacc = rbind(dataCext1acc, dataCext2acc, dataCext3acc) dataCexlALL = rbind(dataCext, dataCextacc) dataCexlALLlogG1 = dataCexlALL[dataCexlALL$mc>=1, ] dataCexlALLlogG1[, 2:4] = log(dataCexlALLlogG1[, 2:4], base=10) dataCexlALLlogG10 = dataCexlALL[dataCexlALL$mc>=10, ] dataCexlALLlogG10[, 2:4] = log(dataCexlALLlogG10[, 2:4], base=10)
plt4D <- ggplot() + geom_point(data=dataCexlALLlogG1, aes(mc, vc, col = who), size=0.7, alpha=0.4) + geom_line(data=data.frame(a=c(0,7.5), b=c(0,7.5)), aes(a,b), linetype="dashed") + scale_y_continuous(breaks=c(0:7), labels=10**c(0:7)) + scale_x_continuous(breaks=c(0:7), labels=10**c(0:7)) + facet_grid(~ what) + theme_bw() + stat_smooth(data=dataCexlALLlogG1, aes(mc, vc, group=who), col="black", method = "lm", formula = (y ~ offset(x)))+ scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylab("Dispersion of gene coverages") + xlab("Mean gene coverage") + theme(legend.position="None", text = element_text(size=28), axis.text.x = element_text(angle = 90)) plt4D
slopesG1 = sapply(unique(dataCexlALLlogG1$what), function(what){ sapply(unique(dataCexlALLlogG1$who), function(who){ sqrt(10**(lm(dataCexlALLlogG1[dataCexlALLlogG1$who==who & dataCexlALLlogG1$what==what & dataCexlALLlogG1$vc!= -Inf, ], formula = (vc ~ offset(mc)))$coefficients)) }) }) slopesG10 = sapply(unique(dataCexlALLlogG10$what), function(what){ sapply(unique(dataCexlALLlogG10$who), function(who){ sqrt(10**lm(dataCexlALLlogG10[dataCexlALLlogG10$who==who & dataCexlALLlogG10$what==what, ], formula = (vc ~ offset(mc)))$coefficients) }) }) slopesG1 slopesG10
ggplot() + geom_point(data=dataCexlALL[dataCexlALL$mc>=1, ], aes(mc, vc, col = who), size=0.7, alpha=0.4) + geom_line(data=data.frame(a=10**c(0,7.5), b=10**c(0,7.5)), aes(a,b), linetype="dashed") + scale_y_log10() + scale_x_log10() + facet_grid(~ what) + theme_bw() + stat_smooth(data=dataCexlALL[dataCexlALL$mc>=1, ], aes(mc, vc, group=who), col="black", method = "lm", formula = (y ~ offset(x)))+ scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylab("Dispersion of gene coverages") + xlab("Mean gene coverage") + theme(legend.position="None", text = element_text(size=28), axis.text.x = element_text(angle = 90))
sapply(unique(dataCexlALL[dataCexlALL$mc>=1, ]$what), function(what){ sapply(unique(dataCexlALL[dataCexlALL$mc>=1, ]$who), function(who){ sqrt( lm(dataCexlALL[dataCexlALL$mc>=1, ][dataCexlALL[dataCexlALL$mc>=1, ]$who==who & dataCexlALL[dataCexlALL$mc>=1, ]$what==what, ], formula = (vc ~ 0+mc))$coefficients ) }) }) #Ээээээ
CC
CC_ov_df = data.frame(QCC = unlist(CC)[6:45], overdispersion_sqr = rep(slopesG10[,1], each=15)[6:45], method = rep(unlist(list_of_libprepnames), each=15)[6:45]) gg4c = ggplot() + geom_boxplot(data=CC_ov_df, aes(overdispersion_sqr, QCC, col=method)) + geom_point(data=CC_ov_df, aes(overdispersion_sqr, QCC, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylim(1,max(CC_ov_df$QCC)*1.1) + xlim(1,max(CC_ov_df$QCC)*1.1) + xlab("Sqrt of overdispersion") + theme(legend.position= "None", text = element_text(size=28)) gg4c
libnreps = c(5,6,6) reps_N_from30 = list(reps_6_from30[[1]][2:6], reps_6_from30[[2]], reps_6_from30[[3]]) sdPairs = lapply(1:3, function(l){ combn(1:libnreps[[l]], 2, function(x){ df = data_30mln_noX[[l]][, sort(c(1, reps_N_from30[[l]][x]*2, reps_N_from30[[l]][x]*2+1))] var_cov = sapply(1:nrow(df), function(i){var(c(df[i,2]+df[i,3], df[i,4]+df[i,5]))}) cov = MeanCoverage(df)$meanCOV*2 df_log = data.frame(cov_log=log(cov, base=10), var_cov_log=log(var_cov, base=10)) df_log = df_log[!is.infinite(df_log$cov_log) & !is.infinite(df_log$var_cov_log),] plot(df_log) sqrt_log_intercept = sqrt(10**lm(df_log, formula = (var_cov_log ~ offset(cov_log)))$coefficients) print(sqrt_log_intercept) return(sqrt_log_intercept) }) }) sdPairs
CC_ov_sdp_df = data.frame(QCC = unlist(CC)[6:45], sd_pair = unlist(sdPairs), overdispersion_sqr = rep(slopesG10[,1], each=15)[6:45], method = rep(unlist(list_of_libprepnames), each=15)[6:45]) plt_ov_qcc = ggplot() + geom_boxplot(data=CC_ov_sdp_df, aes(overdispersion_sqr, QCC, col=method)) + geom_point(data=CC_ov_sdp_df, aes(overdispersion_sqr, QCC, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylim(0,3) + xlim(0,3) + xlab("Sqrt of overdispersion") + theme(legend.position= "None", text = element_text(size=28)) plt_sdp_qcc = ggplot() + geom_boxplot(data=CC_ov_sdp_df, aes(sd_pair, QCC, col=method)) + geom_point(data=CC_ov_sdp_df, aes(sd_pair, QCC, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylim(0,3) + xlim(0,3) + xlab("Counts sd") + theme(legend.position= "None", text = element_text(size=28)) plt_sdp_ov = ggplot() + geom_boxplot(data=CC_ov_sdp_df, aes(sd_pair, overdispersion_sqr, col=method)) + geom_point(data=CC_ov_sdp_df, aes(sd_pair, overdispersion_sqr, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylim(0,3) + xlim(0,3) + xlab("Counts sd") + ylab("Sqrt of overdispersion")+ theme(legend.position= "None", text = element_text(size=28)) cowplot::plot_grid(plt_ov_qcc, plt_sdp_qcc, plt_sdp_ov, ncol=3)
grid.arrange( ggplot() + geom_boxplot(data=CC_ov_sdp_df, aes(overdispersion_sqr, QCC, col=method)) + geom_point(data=CC_ov_sdp_df, aes(overdispersion_sqr, QCC, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + xlab("Sqrt of overdispersion") + theme(legend.position= "None", text = element_text(size=28)) , ggplot() + geom_point(data=CC_ov_sdp_df, aes(sd_pair, QCC, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + xlab("Counts sd") + theme(legend.position= "None", text = element_text(size=28)) , ggplot() + geom_boxplot(data=CC_ov_sdp_df, aes(overdispersion_sqr, sd_pair, col=method)) + geom_point(data=CC_ov_sdp_df, aes(overdispersion_sqr, sd_pair, col=method)) + theme_bw() + scale_color_manual(values=c("royalblue1","olivedrab3","maroon2")) + ylab("Counts sd") + xlab("Sqrt of overdispersion")+ theme(legend.position= "None", text = element_text(size=28)) , ncol=3 )
df_2exp = lapply(1:3, function(i){ data.frame(method = list_of_libprepnames[[i]], t(combn(reps_6_from30[[i]],2)), CC = CC[[i]]) }) df_2exp
df_1exp = lapply(1:3, function(i){ do.call(rbind, lapply(1:ncol(combn(reps_6_from30[[i]],2)), function(j){ pair1 = combn(reps_6_from30[[i]],2)[,j] rest1 = reps_6_from30[[i]][! reps_6_from30[[i]] %in% pair1] df = data.frame(X1=rep(pair1[1], each=ncol(combn(rest1, 2))), X2=rep(pair1[2], each=ncol(combn(rest1, 2))), X3=(combn(rest1, 2))[1,], X4=(combn(rest1, 2))[2,], CC_12 = CC[[i]][j]) df$CC_34 = sapply(1:nrow(df), function(k){ df_2exp[[i]][df_2exp[[i]]$X1==df[k, ]$X3 & df_2exp[[i]]$X2==df[k, ]$X4, ]$CC }) df })) }) df_1exp
set.seed(3); df_1exp_15 = lapply(df_1exp, function(x){x[sample(nrow(x),15), ]}) set.seed(4); df_2exp_15 = lapply(1:ncol(combn(1:3,2)), function(c){ m1 = combn(1:3,2)[1,c] m2 = combn(1:3,2)[2,c] data.frame(df_2exp[[m1]][sample(1:nrow(df_2exp[[m1]]),15), ], df_2exp[[m2]][sample(1:nrow(df_2exp[[m2]]),15), ]) }) df_1exp_15 df_2exp_15
interrep_exp_diff = lapply(1:3, function(df_1exp_n){ lapply(1:15, function(i){ print(paste(df_1exp_n, i)) data1 = df_1exp_15[[df_1exp_n]][i, ] df = data_30mln_noX[[df_1exp_n]][, c(1, sort(c(data1$X1*2, data1$X1*2+1, data1$X2*2, data1$X2*2+1)), sort(c(data1$X3*2, data1$X3*2+1, data1$X4*2, data1$X4*2+1)))] PerformBinTestAIAnalysisForTwoConditions_knownCC(df, vect1CondReps=1:2, vect2CondReps=3:4, vect1CondRepsCombsCC=data1$CC_12, vect2CondRepsCombsCC=data1$CC_34, Q=0.95, thr=10, thrUP=NA, thrType="each", minDifference=NA) }) })
betweenexp_diff = lapply(1:3, function(df_2exp_n){ m1 = combn(1:3, 2)[1, df_2exp_n] m2 = combn(1:3, 2)[2, df_2exp_n] lapply(1:15, function(i){ print(paste(df_2exp_n, i)) data2 = df_2exp_15[[df_2exp_n]][i, ] df1 = data_30mln_noX[[m1]][, c(1, sort(c(data2$X1*2, data2$X1*2+1, data2$X2*2, data2$X2*2+1)))] df2 = data_30mln_noX[[m2]][, c(1, sort(c(data2$X1.1*2, data2$X1.1*2+1, data2$X2.1*2, data2$X2.1*2+1)))] df = merge(df1, df2, by="ensembl_gene_id") head(df) PerformBinTestAIAnalysisForTwoConditions_knownCC(df, vect1CondReps=1:2, vect2CondReps=3:4, vect1CondRepsCombsCC=data2$CC, vect2CondRepsCombsCC=data2$CC.1, Q=0.95, thr=10, thrUP=NA, thrType="each", minDifference=NA) }) }) betweenexp_diff[[1]]
interrep_exp_diff_list = lapply(interrep_exp_diff, function(m_list){ do.call(rbind, lapply(1:length(m_list), function(x){ df = na.omit(m_list[[x]][, c("BT","BT_CC")]) data.frame( BT_count = sum(df[, 1]), BTCC_count = sum(df[, 2]), count = nrow(df), BT_p = sum(df[, 1])/nrow(df), BTCC_p = sum(df[, 2])/nrow(df) ) })) }) interrep_exp_diff_df = do.call(rbind, lapply(1:3, function(i){ rbind( data.frame(method = list_of_libprepnames[[i]], DiffASE_proportion = interrep_exp_diff_list[[i]]$BT_p, DiffASE_N = interrep_exp_diff_list[[i]]$BT_count, test = "BT", what = "1 Experiment"), data.frame(method = list_of_libprepnames[[i]], DiffASE_proportion = interrep_exp_diff_list[[i]]$BTCC_p, DiffASE_N = interrep_exp_diff_list[[i]]$BTCC_count, test = "BTCC", what = "1 Experiment") ) })) ggplot(interrep_exp_diff_df, aes(test, DiffASE_proportion, col=method)) + geom_boxplot() + theme_bw() + scale_color_manual(values=c("royalblue1","maroon2","olivedrab3")) + facet_grid(~ what)
betweenexp_diff_list = lapply(betweenexp_diff, function(m_list){ do.call(rbind, lapply(1:length(m_list), function(x){ df = na.omit(m_list[[x]][, c("BT","BT_CC")]) data.frame( BT_count = sum(df[, 1]), BTCC_count = sum(df[, 2]), count = nrow(df), BT_p = sum(df[, 1])/nrow(df), BTCC_p = sum(df[, 2])/nrow(df) ) })) }) betweenexp_exp_diff_df = do.call(rbind, lapply(1:3, function(i){ mm = combn(1:3, 2)[,i] rbind( data.frame(method = paste(list_of_libprepnames[[mm[1]]], "vs", list_of_libprepnames[[mm[2]]]), DiffASE_proportion = betweenexp_diff_list[[i]]$BT_p, DiffASE_N = betweenexp_diff_list[[i]]$BT_count, test = "BT", what = "2 Experiments"), data.frame(method = paste(list_of_libprepnames[[mm[1]]], "vs", list_of_libprepnames[[mm[2]]]), DiffASE_proportion = betweenexp_diff_list[[i]]$BTCC_p, DiffASE_N = betweenexp_diff_list[[i]]$BTCC_count, test = "BTCC", what = "2 Experiments") ) })) ggplot(betweenexp_exp_diff_df, aes(test, DiffASE_proportion, col=method)) + geom_boxplot() + theme_bw() + facet_grid(~ what)
ggplot(rbind(betweenexp_exp_diff_df, interrep_exp_diff_df), aes(test, DiffASE_proportion, col=method)) + geom_boxplot() + theme_bw() + facet_grid(~ what) + scale_color_manual(values=c("orange","red","blue", "royalblue1","maroon2","olivedrab3")) + ylab("Proportion of genes called differential") + xlab("")
ggplot(rbind(betweenexp_exp_diff_df, interrep_exp_diff_df), aes(method, DiffASE_proportion, col=method)) + geom_boxplot() + theme_bw() + facet_grid(~ test) + scale_color_manual(values=c("orange","red","blue", "royalblue1","maroon2","olivedrab3")) + ylab("Proportion of genes called differential") + xlab("")
#pdf("/home/asya/inter_extra_BT_BTCC.pdf", height = 4, width = 10) ggplot(rbind(betweenexp_exp_diff_df, interrep_exp_diff_df), aes(method, DiffASE_N, col=method)) + geom_boxplot() + theme_bw() + facet_grid(~ test) + scale_color_manual(values=c("orange","red","blue", "royalblue1","maroon2","olivedrab3")) + ylab("Number of genes called differential") + xlab("") #dev.off()
diff_df = rbind(betweenexp_exp_diff_df, interrep_exp_diff_df) diff_df$method = as.factor(diff_df$method) diff_df$method1 = as.factor(diff_df$method) diff_df$method2 = as.factor(diff_df$method) levels(diff_df$method1) = sapply(levels(diff_df$method), function(x){unlist(strsplit(x, " vs "))[1]}) levels(diff_df$method2) = sapply(levels(diff_df$method), function(x){y=unlist(strsplit(x, " vs ")); y[length(y)]}) ggplot(diff_df, aes(test, DiffASE_N, col=method)) + geom_boxplot() + theme_bw() + facet_grid(method1 ~ method2) + scale_color_manual(values=c("orange","red","blue", "royalblue1","maroon2","olivedrab3")) + ylab("Number of genes called differential") + xlab("")
Looks not ideal, previous -- better.
diff_df$test = as.factor(diff_df$test) levels(diff_df$test) = c("binomial", "corrected") ggplot(diff_df, aes(method, DiffASE_N, col=method)) + geom_boxplot(lwd=1.1) + theme_bw() + facet_grid(~ test) + scale_color_manual(values=c("orange","red","green", "royalblue1","maroon2","olivedrab3")) + labs(col = "Library preparation method:") + ylab("Number of genes called differential") + xlab("") + theme(legend.position= "None", text = element_text(size=28), axis.text.x=element_blank()) #theme(legend.position= c(0.78, 0.82), text = element_text(size=28), axis.text.x=element_blank()) #dev.off()
betweenexp_diff_list interrep_exp_diff_list
lapply(df_1exp_15, function(x){table(x[, 1:2])}) lapply(df_1exp_15, function(x){table(x[, 3:4])})
dfCC = data.frame( QCC = unlist(CC), experiment = rep(c("Exp 1: NEBNext (100ng)", "Exp 2: SMARTseq (10ng)", "Exp 3: SMARTseq (0.1ng)"), each=15) ) ggplot(dfCC, aes(experiment, QCC, col=experiment)) + geom_jitter(size=4) + ylim(1, 3) + xlab("") + ylab("Quality Correction Constant") + theme_bw() + theme(legend.position="None", text = element_text(size=28)) + labs(col = "Library preparation method:") + scale_x_discrete(labels=c("Exp 1", "Exp 2", "Exp 3")) + scale_color_manual(values=c("royalblue1","maroon2","olivedrab3"))
figure_3E_c <- ggplot(df, aes(x=libprep, y=Pc, col=libprep)) + geom_boxplot() + geom_point() + facet_grid(~what) + xlab("") + ylab("Concordance % \nbetween two replicates") + theme_bw() + theme(legend.position="bottom", text = element_text(size=28)) + guides(col=guide_legend(title="", nrow = 1)) + scale_color_manual(labels=methods, values=c("royalblue1","maroon2","olivedrab3")) + #scale_color_manual(values=c("#999999", "#66FFB2"), labels=c("no correction", "QCC correction")) + scale_x_discrete(labels=c("", "", ""), limits = rev(levels(df$libprep))) #+ #coord_flip() figure_3E_d <- ggplot(df, aes(x=libprep, y=Pd, col=libprep)) + geom_boxplot() + geom_point() + facet_grid(~what) + xlab("") + ylab("DDR % \nbetween two replicates") + theme_bw() + theme(legend.position="bottom", text = element_text(size=28)) + guides(col=guide_legend(title="", nrow = 1)) + scale_color_manual(labels=methods, values=c("royalblue1","maroon2","olivedrab3")) + #scale_color_manual(values=c("#999999", "#66FFB2"), labels=c("no correction", "QCC correction")) + scale_x_discrete(labels=c("", "", ""), limits = rev(levels(df$libprep))) #+ #coord_flip() cowplot::plot_grid(figure_3E_d, figure_3E_c)
dfCC = rbind( data.frame(t(combn(6,2)), QCC = CC[[1]], exp = "NEBNext (100ng)"), data.frame(t(combn(6,2))[, 2:1], QCC = CC[[1]], exp = "NEBNext (100ng)"), data.frame(t(combn(6,2)), QCC = CC[[2]], exp = "SMARTseq (10ng)"), data.frame(t(combn(6,2))[, 2:1], QCC = CC[[2]], exp = "SMARTseq (10ng)"), data.frame(t(combn(6,2)), QCC = CC[[3]], exp = "SMARTseq (0.1ng)"), data.frame(t(combn(6,2))[, 2:1], QCC = CC[[3]], exp = "SMARTseq (0.1ng)") ) ggplot(dfCC, aes(X1, X2)) + facet_grid(~ exp) + geom_tile(aes(fill = QCC))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.