knitr::opts_chunk$set(echo = FALSE,message = FALSE,warning = FALSE)
library(rtracklayer) library(AnnotationHub) library(Rsamtools) library(GenomicRanges) library(Gviz) library(GenomicFeatures) library(biomaRt) library(openxlsx) library(tidyverse) source('~/Main/anne/epigenetics/nucleosome_analysis/functions.R') scer <- import(con='~/Downloads/ucscgenes.bed') setwd('~/Main/pgsNetwork/vignettes/') summ <- read.csv('~/Main/SUMMARY_2012.csv') summ.gene <- summ$orf_name[summ$category=='gene'] grl<-scer[scer$name%in%summ.gene] pgs <- read_csv('../data/0701_proto-genes_list') pgs <- pgs$orf_name gene <- scer$name gene.data <- scer#$[scer$name%in%pgs] tif <- read_delim('~/Main/anne/epigenetics/tif/S1_TIFs.txt',delim=' ') #tif <- read_delim('../tif/S2_tcd_mTIFAnno.txt',delim=' ') tif <- transform(tif, start = ifelse(strand == '-', t3, t5), end = ifelse(strand == '-', t5, t3)) %>% mutate(chr=recode(chr,`1`='chrI',`2`='chrII',`3`='chrIII',`4`='chrIV',`5`='chrV',`6`='chrVI', `7`='chrVII',`8`='chrVIII',`9`='chrIX',`10`='chrX',`11`='chrXI',`12`='chrXII',`13`='chrXIII', `14`='chrXIV',`15`='chrXV',`16`='chrXVI')) tif_ <- read_delim('~/Main/anne/epigenetics/tif/S1_TIFs.txt',delim=' ',col_names = paste0("V",seq_len(12))) colnames(tif_)[1:8] <- tif_[1,1:8] colnames(tif_)[11] <- 'orf_name' tif_ <- tif_[-1,] %>% filter(is.na(orf_name)==F) gene.upstream <- promoters(gene.data,upstream=300,downstream=0) gene.downstream <- expandRange(gene.data,upstream=0,downstream=200) gene.tifreads <- data.frame(orf_name=gene.data$name,ypd=NA,gal=NA,count=NA) for(i in 1:length(gene.data)){ gene.tifreads$ypd[i] <- mean(as.numeric(c(tif_$ypd[tif_$orf_name==gene.data$name[i]]))) gene.tifreads$gal[i] <- mean(as.numeric(c(tif_$gal[tif_$orf_name==gene.data$name[i]]))) gene.tifreads$count[i] <- nrow(tif_[tif_$orf_name==gene.data$name[i],]) } gene.tifreads
library(rtracklayer) library(AnnotationHub) library(Rsamtools) library(GenomicRanges) library(Gviz) library(GenomicFeatures) library(biomaRt) library(openxlsx) scer <- import(con='~/Downloads/ucscgenes.bed') tss_park <- import(con='~/Main/anne/epigenetics/tif/park.bed',format = 'bed') summ <- read.csv('~/Main/SUMMARY_2012.csv') summ.gene <- summ$orf_name[summ$category=='gene'] grl<-scer[scer$name%in%summ.gene] tif <- read_delim('~/Main/anne/epigenetics/tif/S1_TIFs.txt',delim=' ') #tif <- read_delim('../tif/S2_tcd_mTIFAnno.txt',delim=' ') tif <- transform(tif, start = ifelse(strand == '-', t3, t5), end = ifelse(strand == '-', t5, t3)) %>% mutate(chr=recode(chr,`1`='chrI',`2`='chrII',`3`='chrIII',`4`='chrIV',`5`='chrV',`6`='chrVI', `7`='chrVII',`8`='chrVIII',`9`='chrIX',`10`='chrX',`11`='chrXI',`12`='chrXII',`13`='chrXIII', `14`='chrXIV',`15`='chrXV',`16`='chrXVI')) tif_rs <- GRanges(seqnames = tif$chr,strand = tif$strand,ranges = IRanges(start=ifelse(tif$strand=='+',tif$start,tif$end),end = ifelse(tif$strand=='+',tif$start,tif$end)),DataFrame(score=tif$ypd,gal=tif$gal)) tif_re <- GRanges(seqnames = tif$chr,strand = tif$strand,ranges = IRanges(start=ifelse(tif$strand=='+',tif$end,tif$start),end =ifelse(tif$strand=='+',tif$end,tif$start)),DataFrame(score=tif$ypd,gal=tif$gal)) gene.prm <- promoters(grl,upstream=200,downstream=0) gene.prm$tss <- NA pranges <- GRanges() for(i in 1:length(gene.prm)){ p <- gene.prm[i] tss_ <- median(start(tif_rs[tif_rs%over%p])) gene.prm[i]$tss <- tss_ if(is.na(tss_)==F){ pranges <- append(pranges,GRanges(seqnames = seqnames(p),strand=strand(p),ranges=IRanges(start=tss_,end=tss_),DataFrame(name=p$name,score=p$score))) } } tss_tif <- pranges park.diff <- setdiff(tss_park$name,tss_tif$name) gene.data <- scer#[scer$name%in%pgs] tss_comb <- append(tss_tif,tss_park) tss <- tss_tif#[tss_tif$name%in%setdiff(tss_tif$name,gene.data$name)]#tss_comb[tss_comb$name%in%setdiff(tss_comb$name,gene.data$name)] dist.list <- c() dist.list.data <- data.frame(name=NA, dist=NA) for(i in 1:length(gene.data)){ tryCatch({ tss_ <- tss[tss$name!=gene.data[i]$name] nn <- GRanges(seqnames = seqnames(gene.data[i]),ranges = ranges(gene.data[i]),strand = '-') np <- GRanges(seqnames = seqnames(gene.data[i]),ranges = ranges(gene.data[i]),strand = '+') o <- queryHits(findOverlaps(tss_,gene.data[i],ignore.strand=T)) if(length(o)!=0){ #u <- follow(nn,tss_) #d <- nearest(gene.data[i],tss_,ignore.strand=T) print(i) dist.list[[ mcols(gene.data[i])$name]] <- list(list(gene.data[i],tss_[o])) dist.list.data <- rbind(dist.list.data,data.frame(name=mcols(gene.data)$name[i],dist=0)) }else{ #d <- nearest(gene.data[i],tss_,ignore.strand=T) u <- precede(np,tss_) d <- precede(nn,tss_) du <- distance(tss_[u],gene.data[i],ignore.strand=T) dd <- distance(tss_[d],gene.data[i],ignore.strand=T) if(du<dd){ dist.list[[ mcols(gene.data[i])$name]] <- list(list(gene.data[i],tss_[d])) dist.list.data <- rbind(dist.list.data,data.frame(name=mcols(gene.data)$name[i],dist=du)) }else{ dist.list[[ mcols(gene.data[i])$name]] <- list(list(gene.data[i],tss_[d])) dist.list.data <- rbind(dist.list.data,data.frame(name=mcols(gene.data)$name[i],dist=dd)) } # dist.list[[ mcols(gene.data[i])$name]] <- list(list(gene.data[i],tss_[d])) # dist.list.data <- append(dist.list.data,distance(tss_[d],gene.data[i],ignore.strand=T)) # names(dist.list.data)[i] <-mcols(gene.data)$name[i] } }, error = function(e) NULL ) #dist.list <- append(dist.list,mcols(distanceToNearest(gene.data[i],tss__park,ignore.strand=T))) } dist.list.data <- dist.list.data[-1,] rhee.all <- Sys.glob('~/Main/anne/epigenetics/Rhee_2011/*.bed') rhee.dat <- lapply(rhee.all, import) %>% unlist() %>% GenomicRangesList() %>% unlist() rhee.gene <- c() j=0 for( i in 1:length(gene)){ c <- countOverlaps(gene.data[i],rhee.dat) if(c>0){ #print(paste(c,mcols(gene.data)$name[i],i)) rhee.gene <- append(rhee.gene,gene.data$name[i]) j <- j+1 } } gene.df <- data.frame(orf_name=gene,tss=NA) for(i in 1:length(gene)){ gene.df$tss[i] <- ifelse(gene.df$orf_name[i]%in%dist.list.data$name,dist.list.data[dist.list.data$name==as.character(gene.df$orf_name[i]),2],NA) }
library(openxlsx) library(NucleoATACR) library(tidyverse) library(dplyr) library(GenomicRanges) library(BBmisc) library(rtracklayer) library(RColorBrewer) #source('~/Main/anne/epigenetics/nucleosome_occupancy/functions.R') set.seed(1) scer <- import(con='~/Downloads/ucscgenes.bed') gene.data <- scer#[scer$name%in%pgs] nucs <- readNucs('~/Main/anne/epigenetics/nucleosome_occupancy/lin_all.nucpos.bed')%>%flank(74,both=T,ignore.strand=T)#"test.nucmap_combined.bed.gz") #tif.data <- read_tsv('../../../../pgs/tif_count.tsv') #%>% arrange(order(orf_name)) #tif.data <- tif.data[order(tif.data$orf_name),] #nfrs <- import('~/Main/anne/epigeneticsnucleosome_occupancy/lin_all.occ.bw')#"test.nfrpos.bed.gz")
source('../R/add.degree.R') overlappingorfs <- read_csv("../analysis/data/raw_data/overlappingorfs.csv") net.df <- readRDS("../analysis/data/derived_data/SGA_data_combined.rds.gz") strain_ids <- read_csv("../analysis/data/derived_data/strain_ids.csv") allele.frame <- readRDS("../analysis/data/derived_data/df_different_alleles.rds") nones.exp.data <- read_csv("../analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") exp.data <- read_csv("../analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs,'proto-gene',ifelse(`Systematic gene name`%in%nones.exp.data$`Systematic gene name`==F,'essential','nonessential'))) net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)#& str_detect(`Query allele name`,'supp')==F & str_detect(`Query allele name`,'supp')==F) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs) %>% dplyr::select(`Allele Gene name`) %>% pull() library(igraph) library(coin) thr <- -0.35 allnet <- graph_from_data_frame(net_df_significant[,c(2,4,6)],directed=FALSE) adj <- as_adjacency_matrix(allnet,type='both',attr="Genetic interaction score (ε)") %>% as.matrix() net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= thr)#& `Double mutant fitness standard deviation` <= 0.1) allnet_sl <- graph_from_data_frame(net_df_significant_sl[,c(2,4)],directed=FALSE)#%>% igraph::simplify() colname <- colnames(nones.exp.data)[2] #adj_sl <- as_adjacency_matrix(allnet_sl,type='both',attr="Genetic interaction score (ε)") %>% as.matrix() #allnet_sl_deg <- degree(allnet_sl) #pgs_deg <- allnet_sl_deg[names(allnet_sl_deg) %in% pgs_allele] nones.exp.data <- nones.exp.data %>% add.degree(allnet_sl, colname) %>% dplyr::mutate(int_density = deg / num,group=ifelse(`Systematic gene name`%in%pgs,'Proto-gene','Nonessential')) exp.data <- exp.data %>% add.degree(allnet_sl, colname) %>% dplyr::mutate(int_density = deg / num) # pgs_min <- map(strain_ids$`Allele Gene name`,function(x) { # d=adj_sl[,x] # d[d <= thr] # } ) #names(pgs_min) <- pgs_allele[pgs_allele%in%V(allnet_sl)$name] #adj[adj==0] <- NA gene_min <- map(colnames(adj),function(x) { d=adj[,x] %>% as.numeric() %>% min() #d[d < 0]#%>%as.numeric() } ) names(gene_min) <- colnames(adj) gene_min <- unlist(gene_min) adj[,'ybl008w-a'] ggplot(nones.exp.data %>% filter(int_density>=0),aes(x=group,y=int_density))+ stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) independence_test(group~int_density,data = nones.exp.data %>% mutate(group=as.factor(group)) %>% filter(int_density>=0))
net_df_significant <- net_df_significant %>% mutate(group=ifelse(`Query Strain ID`%in%pgs|`Array Strain ID`%in%pgs,'proto-gene',ifelse(`Query Strain ID`%in%nones.exp.data$`Systematic gene name`==F&`Array Strain ID`%in%nones.exp.data$`Systematic gene name`==F,'essential','nonessential'))) ggplot(net_df_significant,aes(y=`Double mutant fitness standard deviation`,x=`Genetic interaction score (ε)`))+geom_point(aes(color=group))+xlim(NA,-0.2)+ylim(0,0.2) ggplot(net_df_significant%>%filter(`Genetic interaction score (ε)`<=-0.35),aes(y=`Double mutant fitness standard deviation`,x=group))+ stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) ggplot(net_df_significant%>%filter(`Genetic interaction score (ε)`<=-0.2),aes(x=`Double mutant fitness standard deviation`,color=group,y=..ndensity..))+geom_freqpoly(binwidth=0.01)+geom_vline(xintercept = 0.1) # stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + # stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) n_fun <- function(x){ return(data.frame(y = median(x)*1.15, label = paste0(length(x)))) } exp.data <- exp.data %>% mutate(bin = as.factor(bin))#num_cat = case_when( ggplot(nones.exp.data %>% filter(int_density>=0),aes(color=group,x=int_density))+geom_freqpoly(aes(y=..density..),binwidth=0.1)+scale_x_log10() independence_test(group~int_density,exp.data%>%filter(group!='essential'&cat!='nxes.only')%>%mutate(group=factor(group))) pdf('../analysis/figures/0709_interactiondensity/03501sdgroupedintdensity.pdf') #ggplot(exp.data,aes(x=deg,y=num,color=group))+geom_point()#+scale_x_log10()+scale_x_log10() pltdata=exp.data%>%filter(deg>0&group!='essential'&bin%in%exp.data$bin[exp.data$group=='proto-gene']&cat%in%exp.data$cat[exp.data$group=='proto-gene']) ggplot(pltdata,aes(x=bin,y=int_density,color=group,fill=cat))+#geom_boxplot() stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1))+facet_grid(cat~.) pdf('../analysis/figures/0709_interactiondensity/intdensity_nonespgs_nonxesonly02.pdf') ,color=group ggplot(exp.data%>%filter(group!='essential'&cat!='nxes.only'),aes(x=group,y=int_density,fill=group))+#geom_boxplot() stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1))+ stat_summary(fun.data = n_fun, geom = "text",position=position_dodge(width=1))+ylab("Interaction density")+xlab("")+ coord_cartesian(ylim=c(0, 0.008)) dev.off()
library(coin) gene_deg <- degree(allnet_sl) for(i in 1:nrow(gene.df)){ name <- gene.df$orf_name[i] %>% as.character() width <- width(gene.data[gene.data$name==name]) deg <- gene_deg[names(gene_deg)==strain_ids %>% filter(`Systematic gene name` ==name) %>% dplyr::select(`Allele Gene name`) %>% pull()] min <- gene_min[names(gene_min)==strain_ids %>% filter(`Systematic gene name` ==name) %>% dplyr::select(`Allele Gene name`) %>% pull()] gene.df$deg[i] <- ifelse(length(deg)==0,0,deg) gene.df$min[i] <- ifelse(length(min)==0,NA,min) #tss.d <- tss.dist[tss.dist$orf_name==name,] gene.df$tss.bool[i] <- ifelse(gene.df$tss[i]>200,1,0) gene.df$nucs[i] <- ifelse(countOverlaps(gene.data[i],nucs)>0,1,0)#gene.atac$data[gene.atac$orf_name==name] gene.df$int_dens[i] <- ifelse(name%in%nones.exp.data$`Systematic gene name`,nones.exp.data$int_density[nones.exp.data$`Systematic gene name`==name],NA) } library(ggpubr) library(ggsignif) ggplot(gene.df%>%unnest(),aes(x=tss,y=abs(min)))+geom_point()+geom_smooth(method=lm, se=FALSE)+stat_cor(method = "pearson")+ ylab('negative interaction strength')+xlab('distance to tss')+theme_bw() ggsave('../analysis/figures/0708pres/negative02_distancetss_scatter.pdf') #+scale_x_log10()+scale_y_log10() convertBool <- function(x) ifelse(x>0,1,0) pdf('../analysis/figures/0730paper/nfr_degree_nosd_0.35.pdf') p=pvalue(independence_test(nucs~deg,data=gene.df %>% mutate(nucs=as.factor(nucs)))) ggplot(gene.df %>% mutate(nucs=as.factor(nucs)),aes(x=nucs,y=deg))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Number of synthetic lethal interactions')+xlab('Nucleosome occupied')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=18),axis.text = element_text(size=14),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=8,tip_length = 0.005) dev.off() pdf('../analysis/figures/0730paper/nfr_min_nosdthr.pdf') p=pvalue(independence_test(nucs~min,data=gene.df %>% mutate(nucs=as.factor(nucs))%>%unnest())) ggplot(gene.df %>% mutate(nucs=as.factor(nucs))%>%unnest(),aes(x=nucs,y=abs(min)))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Strongest negative interaction')+xlab('Nucleosome occupied')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=18),axis.text = element_text(size=14),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=1,tip_length = 0.005) dev.off() pdf('../analysis/figures/0730paper/tss_degree_nosdthr0.35.pdf') p=pvalue(independence_test(tss.bool~deg,data=gene.df%>% filter(is.na(tss.bool)==F) %>% mutate(tss.bool=as.factor(tss.bool)))) ggplot(gene.df %>% filter(is.na(tss.bool)==F) %>% mutate(tss.bool=as.factor(tss.bool)) ,aes(x=tss.bool,y=deg))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Number of synthetic lethal interactions')+xlab('Distance to downstream TSS >200bp')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=18),axis.text = element_text(size=14),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=8,tip_length = 0.005) dev.off() pdf('../analysis/figures/0730paper/tss_negintstr_02nosdthr.pdf') p=pvalue(independence_test(tss.bool~min,data=gene.df%>% filter(is.na(tss.bool)==F) %>% mutate(tss.bool=as.factor(tss.bool))%>%unnest())) ggplot(gene.df%>% filter(is.na(tss.bool)==F) %>% mutate(tss.bool=as.factor(tss.bool))%>%unnest(),aes(x=tss.bool,y=abs(min)))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + # stat_summary(fun.data = n_fun, geom = "text",position=position_dodge(width=1))+ # stat_summary(aes(label=round(..y..,2)), fun.y=mean, geom="text", size=6, # vjust = -0.5)+ stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Strongest negative interaction')+xlab('Distance to downstream TSS >200bp')+ scale_x_discrete(labels=c('No','Yes'))+theme(legend.title=element_text(size=20),axis.title = element_text(size=18),axis.text = element_text(size=14),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=1,tip_length = 0.005) dev.off() map(gene.df$min,length)%>%unlist library(coin) independence_test(tss.bool~min,data=gene.df %>% mutate(tss.bool=as.factor(tss.bool))) independence_test(nucs~min,data=gene.df %>% mutate(nucs=as.factor(nucs))) alldata.bool <- as_tibble(gene.df)%>% filter(is.na(tss.bool)==F) %>% mutate_if(is.numeric,convertBool) fisher.test(with(alldata.bool,table(nucs,deg))) fisher.test(with(alldata.bool,table(tss,deg))) with(alldata.bool,table(tss,deg)) ft
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.