library("tidyverse")
library(ggpubr)
library(ggrepel)

source(file.path(".","..","R","ASE_functions.R"))
source(file.path(".","..","R","PerformDiffAIAnalysisFor2Conditions.R"))
removeX <- function(DF, legitim_chrgenes){
  return(DF[DF$ensembl_gene_id %in% legitim_chrgenes$gene, ])
}
chrgenes <- read.delim('../../../data/Mus_musculus.GRCm38.68.chrgenes.txt', col.names = c('chr', 'gene'))
inTabs <- paste0("../../../data/kidney/full/",
                c("NEB", "SMARTseq10ng", "SMARTseq100pg"),
                "_processed_gene_extended2.txt")
inDF18 <- removeX(GetGatkPipelineTabs(inTabs, c(6,6,6)), chrgenes)
inDF18 
Ph <- c(0.68, 0.95, 0.98)
RESULT18_Q_humanread <- lapply(Ph, function(p){
  PerformDiffAIAnalysisFor2Conditions(inDF18, vect1CondReps=3:4, vect2CondReps=c(7,11), Q=p, fullOUT=T, BF=F)
})

DF18_covai <- RESULT18_Q_humanread[[1]]$deltaAIPairwise
DF18_covai <- DF18_covai[DF18_covai$group=="Condition2 01 vs 02", c("deltaAI", "MeanCov", "AI1")]

DF18_hobq <- do.call(rbind, lapply(1:length(RESULT18_Q_humanread), function(i){
  df = RESULT18_Q_humanread[[i]]$observedQuartiles
  df = df[df$condition=="Condition2" & df$ij=="01 vs 02", ]
  df[df$binNObservations >= 20, c("coverageBin", "deltaAI", "Q")]
}))

gg_covai <- ggplot() +
  geom_point(data=DF18_covai, aes(MeanCov, deltaAI), size=0.5, color="gray") +
  #geom_line(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q)) +
  #geom_point(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q), size=0.5) +
  labs(x = "Mean Gene Coverage", y = "Allelic Imbalance difference") +
  ggtitle("") + 
  theme_bw() +
  theme(legend.position = c(0.8, 0.8), text = element_text(size=12)) +
  scale_x_log10() +
  geom_vline(xintercept = 8, linetype="dashed") +
  geom_point(aes(x=181, y=0.3), colour="red") +
  geom_point(aes(x=148, y=0.3), colour="blue") +
  geom_point(aes(x=113, y=0.4), colour="green")
  #scale_y_log10()
cowplot::save_plot(gg_covai, file="Fig_tree_with_Qs_emplty_thr10.pdf", base_height = 4)

gg_covai <- ggplot() +
  geom_point(data=DF18_covai, aes(MeanCov, deltaAI), size=0.5, color="gray") +
  geom_line(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q)) +
  geom_point(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(coverageBin, deltaAI, color=Q), size=0.5) +
  labs(x = "Mean Gene Coverage", y = "Allelic Imbalance difference") +
  ggtitle("") + 
  theme_bw() +
  theme(legend.position = c(0.8, 0.8), text = element_text(size=12)) +
  scale_x_log10() 
  #geom_vline(xintercept = 112, linetype="dashed") +
  #geom_vline(xintercept = 146, linetype="dashed")
  #scale_y_log10()
cowplot::save_plot(gg_covai, file="Fig_tree_with_Qs_logx.pdf", base_height = 4)

gg_covai_lin <- ggplot(data=DF18_hobq[DF18_hobq$coverageBin>=10, ], aes(x=coverageBin, y=deltaAI, group=Q, color=Q)) +
  geom_point(size=0.5) + 
  theme_bw() + 
  ggtitle("") + 
  xlab('Mean Gene Coverage') + ylab('Allelic Imbalance difference') +
  theme(legend.position=c(0.8, 0.8), text = element_text(size=12)) +
  scale_x_continuous(trans='log2') +
  scale_y_continuous(trans='log2') +
  geom_smooth(method='lm', formula = y ~ offset(-0.5*x), aes(group=Q)) +
  theme(panel.background = element_rect(fill = 'white', colour = 'white'),
        legend.key = element_rect(fill = F, colour = "white")) +
  guides(color=guide_legend(override.aes=list(fill=NA)))

cowplot::save_plot(gg_covai_lin, file="Fig_parallel_lines.pdf", base_height = 4)


HQint <- sapply(1:length(Ph), function(i){RESULT18_Q_humanread[[i]]$intercepts$Condition2}$linInt)
HQint_Zscore <- rbind(data.frame(quantile = Ph, 
                                observation = HQint/HQint[2], 
                                x = "Correction constant"),
                     data.frame(quantile = Ph, 
                                observation = qnorm(Ph + (1-Ph)/2) / qnorm(0.84), 
                                x = "Binomial Z-score"))
HQint_Zscore$x <- factor(HQint_Zscore$x)

gg_qcoeff  <- ggplot(HQint_Zscore, aes(x, observation, color=x)) +
  geom_point(aes(color=x)) + 
  geom_label_repel(aes(label = quantile),
                  box.padding   = 0.35, 
                  point.padding = 0.5,
                  segment.color = 'grey50',
                  direction     = "y",
                  hjust         = 0) + 
  labs(x = "Type", y = "sd proportion") +
  ggtitle("") +
  theme_bw() +
  theme(legend.position = c(0.5, 0.9), text = element_text(size=12), legend.title = element_blank())   

Plot with coefficients:

# load the data
method_lib <- c("NEB 100ng", "SMARTseq 10 ng", "SMARTseq 100 pg")
INTC18_90_humanread <- do.call(rbind, lapply(0:2, function(i){
  C = (1:6)+i*6    
  data.frame(PerformDiffAIAnalysisFor2Conditions(inDF18, vect1CondReps=3:4, 
                                                 vect2CondReps=C, Q=0.95, fullOUT=T, BF=F)$intercepts$Condition2[, c("ij","linInt")],
             method = method_lib[i+1])
}))
INTC18_90_humanread$method <- factor(INTC18_90_humanread$method)
# plot with jitter
gg_mcoeff1  <- ggplot(INTC18_90_humanread[INTC18_90_humanread$method=="SMARTseq 10 ng",], aes(x="SMART 10ng",y=linInt)) +
  geom_jitter(col="#00BA38") + 
  labs(x = "Lib Preparation Method", y = "Intercept") +
  theme_bw() +
  ylim(0, 2.5)
cowplot::save_plot(gg_mcoeff1, file="Fig_coeff_SMART10.pdf", base_height = 4)


gg_mcoeff  <- ggplot(INTC18_90_humanread, aes(method, linInt, color=method)) +
  geom_jitter(aes(color=method)) + 
  labs(x = "Lib Preparation Method", y = "Intercept") +
  ggtitle("") +
  theme_bw() +
  theme(legend.position = "None", text = element_text(size=12)) +
  ylim(0, 4.6)
cowplot::save_plot(gg_mcoeff, file="Fig_coeff.pdf", base_height = 4)


gimelbrantlab/ASE documentation built on March 1, 2020, 5:41 a.m.