knitr::opts_chunk$set(echo = FALSE,message = FALSE,warning = FALSE)
library(rtracklayer) library(AnnotationHub) library(Rsamtools) library(GenomicRanges) library(Gviz) library(GenomicFeatures) library(biomaRt) library(openxlsx) library(tidyverse) source('~/Main/anne/epigenetics/nucleosome_analysis/functions.R') scer <- import(con='~/Downloads/ucscgenes.bed') setwd('~/Main/pgsNetwork/vignettes/') summ <- read.csv('~/Main/SUMMARY_2012.csv') summ.gene <- summ$orf_name[summ$category=='gene'] grl<-scer[scer$name%in%summ.gene] pgs <- read_csv('../analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv') pgs <- pgs$orf_name pgs.data <- scer[scer$name%in%pgs] tif <- read_delim('~/Main/anne/epigenetics/tif/S1_TIFs.txt',delim=' ') #tif <- read_delim('../tif/S2_tcd_mTIFAnno.txt',delim=' ') tif <- transform(tif, start = ifelse(strand == '-', t3, t5), end = ifelse(strand == '-', t5, t3)) %>% mutate(chr=recode(chr,`1`='chrI',`2`='chrII',`3`='chrIII',`4`='chrIV',`5`='chrV',`6`='chrVI', `7`='chrVII',`8`='chrVIII',`9`='chrIX',`10`='chrX',`11`='chrXI',`12`='chrXII',`13`='chrXIII', `14`='chrXIV',`15`='chrXV',`16`='chrXVI')) tif_ <- read_delim('~/Main/anne/epigenetics/tif/S1_TIFs.txt',delim=' ',col_names = paste0("V",seq_len(12))) colnames(tif_)[1:8] <- tif_[1,1:8] colnames(tif_)[11] <- 'orf_name' tif_ <- tif_[-1,] %>% filter(is.na(orf_name)==F) pgs.upstream <- promoters(pgs.data,upstream=300,downstream=0) pgs.downstream <- expandRange(pgs.data,upstream=0,downstream=200) pgs.tifreads <- data.frame(orf_name=pgs.data$name,ypd=NA,gal=NA,count=NA) for(i in 1:length(pgs.data)){ pgs.tifreads$ypd[i] <- mean(as.numeric(c(tif_$ypd[tif_$orf_name==pgs.data$name[i]]))) pgs.tifreads$gal[i] <- mean(as.numeric(c(tif_$gal[tif_$orf_name==pgs.data$name[i]]))) pgs.tifreads$count[i] <- nrow(tif_[tif_$orf_name==pgs.data$name[i],]) } pgs.tifreads
library(rtracklayer) library(AnnotationHub) library(Rsamtools) library(GenomicRanges) library(Gviz) library(GenomicFeatures) library(biomaRt) library(openxlsx) scer <- import(con='~/Downloads/ucscgenes.bed') tss_park <- import(con='~/Main/anne/epigenetics/tif/park.bed',format = 'bed') summ <- read.csv('~/Main/SUMMARY_2012.csv') summ.gene <- summ$orf_name[summ$category=='gene'] grl<-scer[scer$name%in%summ.gene] tif <- read_delim('~/Main/anne/epigenetics/tif/S1_TIFs.txt',delim=' ') #tif <- read_delim('../tif/S2_tcd_mTIFAnno.txt',delim=' ') tif <- transform(tif, start = ifelse(strand == '-', t3, t5), end = ifelse(strand == '-', t5, t3)) %>% mutate(chr=recode(chr,`1`='chrI',`2`='chrII',`3`='chrIII',`4`='chrIV',`5`='chrV',`6`='chrVI', `7`='chrVII',`8`='chrVIII',`9`='chrIX',`10`='chrX',`11`='chrXI',`12`='chrXII',`13`='chrXIII', `14`='chrXIV',`15`='chrXV',`16`='chrXVI')) tif_rs <- GRanges(seqnames = tif$chr,strand = tif$strand,ranges = IRanges(start=ifelse(tif$strand=='+',tif$start,tif$end),end = ifelse(tif$strand=='+',tif$start,tif$end)),DataFrame(score=tif$ypd,gal=tif$gal)) tif_re <- GRanges(seqnames = tif$chr,strand = tif$strand,ranges = IRanges(start=ifelse(tif$strand=='+',tif$end,tif$start),end =ifelse(tif$strand=='+',tif$end,tif$start)),DataFrame(score=tif$ypd,gal=tif$gal)) gene.prm <- promoters(grl,upstream=200,downstream=0) gene.prm$tss <- NA pranges <- GRanges() for(i in 1:length(gene.prm)){ p <- gene.prm[i] tss_ <- median(start(tif_rs[tif_rs%over%p])) gene.prm[i]$tss <- tss_ if(is.na(tss_)==F){ pranges <- append(pranges,GRanges(seqnames = seqnames(p),strand=strand(p),ranges=IRanges(start=tss_,end=tss_),DataFrame(name=p$name,score=p$score))) } } tss_tif <- pranges park.diff <- setdiff(tss_park$name,tss_tif$name) pgs.data <- scer[scer$name%in%pgs] tss_comb <- append(tss_tif,tss_park) tss <- tss_tif[tss_tif$name%in%setdiff(tss_tif$name,pgs.data$name)]#tss_comb[tss_comb$name%in%setdiff(tss_comb$name,pgs.data$name)] dist.list <- c() dist.list.data <- data.frame(name=NA, dist=NA) for(i in 1:length(pgs.data)){ tryCatch({ nn <- GRanges(seqnames = seqnames(pgs.data[i]),ranges = ranges(pgs.data[i]),strand = '-') np <- GRanges(seqnames = seqnames(pgs.data[i]),ranges = ranges(pgs.data[i]),strand = '+') o <- queryHits(findOverlaps(tss,pgs.data[i],ignore.strand=T)) if(length(o)!=0){ #u <- follow(nn,tss) #d <- nearest(pgs.data[i],tss,ignore.strand=T) print(i) dist.list[[ mcols(pgs.data[i])$name]] <- list(list(pgs.data[i],tss[o])) dist.list.data <- rbind(dist.list.data,data.frame(name=mcols(pgs.data)$name[i],dist=0)) }else{ #d <- nearest(pgs.data[i],tss,ignore.strand=T) u <- precede(np,tss) d <- precede(nn,tss) du <- distance(tss[u],pgs.data[i],ignore.strand=T) dd <- distance(tss[d],pgs.data[i],ignore.strand=T) if(du<dd){ dist.list[[ mcols(pgs.data[i])$name]] <- list(list(pgs.data[i],tss[d])) dist.list.data <- rbind(dist.list.data,data.frame(name=mcols(pgs.data)$name[i],dist=du)) }else{ dist.list[[ mcols(pgs.data[i])$name]] <- list(list(pgs.data[i],tss[d])) dist.list.data <- rbind(dist.list.data,data.frame(name=mcols(pgs.data)$name[i],dist=dd)) } # dist.list[[ mcols(pgs.data[i])$name]] <- list(list(pgs.data[i],tss[d])) # dist.list.data <- append(dist.list.data,distance(tss[d],pgs.data[i],ignore.strand=T)) # names(dist.list.data)[i] <-mcols(pgs.data)$name[i] } }, error = function(e) NULL ) #dist.list <- append(dist.list,mcols(distanceToNearest(pgs.data[i],tss_park,ignore.strand=T))) } dist.list.data <- dist.list.data[-1,] rhee.all <- Sys.glob('~/Main/anne/epigenetics/Rhee_2011/*.bed') rhee.dat <- lapply(rhee.all, import) %>% unlist() %>% GenomicRangesList() %>% unlist() rhee.pgs <- c() j=0 for( i in 1:length(pgs)){ c <- countOverlaps(pgs.data[i],rhee.dat) if(c>0){ #print(paste(c,mcols(pgs.data)$name[i],i)) rhee.pgs <- append(rhee.pgs,pgs.data$name[i]) j <- j+1 } } pgs.df <- data.frame(orf_name=pgs,tss=NA) for(i in 1:length(pgs)){ pgs.df$tss[i] <- dist.list.data[dist.list.data$name==as.character(pgs.df$orf_name[i]),2] }
library(openxlsx) library(NucleoATACR) library(tidyverse) library(dplyr) library(GenomicRanges) library(BBmisc) library(rtracklayer) library(RColorBrewer) source('~/Main/anne/epigenetics/nucleosome_analysis/functions.R') set.seed(1) scer <- import(con='~/Downloads/ucscgenes.bed') pgs.data <- scer[scer$name%in%pgs] nucs <- readNucs('~/Main/anne/epigenetics/nucleosome_occupancy/lin_all.nucpos.bed')%>%flank(74,both=T,ignore.strand=T)#"test.nucmap_combined.bed.gz") #tif.data <- read_tsv('../../../../pgs/tif_count.tsv') #%>% arrange(order(orf_name)) #tif.data <- tif.data[order(tif.data$orf_name),] #nfrs <- import('~/Main/anne/epigeneticsnucleosome_occupancy/lin_all.occ.bw')#"test.nfrpos.bed.gz")
source('../R/add.degree.R') overlappingorfs <- read_csv("../analysis/data/raw_data/overlappingorfs.csv") net.df <- readRDS("../analysis/data/derived_data/SGA_data_combined.rds.gz") strain_ids <- read_csv("../analysis/data/derived_data/strain_ids.csv") allele.frame <- readRDS("../analysis/data/derived_data/df_different_alleles.rds") nones.exp.data <- read_csv("../analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") exp.data <- read_csv("../analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs,'proto-gene',maincat))#ifelse(`Systematic gene name`%in%nones.exp.data$`Systematic gene name`==F,'essential','nonessential'))) net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)#& str_detect(`Query allele name`,'supp')==F & str_detect(`Query allele name`,'supp')==F) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs) %>% dplyr::select(`Allele Gene name`) %>% pull() library(igraph) library(coin) thr <- -0.2 allnet <- graph_from_data_frame(net_df_significant[,c(2,4,6)],directed=FALSE) adj <- as_adjacency_matrix(allnet,type='both',attr="Genetic interaction score (ε)") %>% as.matrix() net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= thr)#& `Double mutant fitness standard deviation` <= 0.1) allnet_sl <- graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed=FALSE)#%>% igraph::simplify() colname <- colnames(nones.exp.data)[1] #adj_sl <- as_adjacency_matrix(allnet_sl,type='both',attr="Genetic interaction score (ε)") %>% as.matrix() allnet_sl_deg <- degree(allnet_sl) pgs_deg <- allnet_sl_deg[names(allnet_sl_deg) %in% pgs] nones.exp.data <- nones.exp.data %>% add.degree(allnet_sl, colname) %>% dplyr::mutate(int_density = deg / num,group=ifelse(`Systematic gene name`%in%pgs,'Proto-gene','Nonessential')) exp.data <- exp.data %>% add.degree(allnet_sl, colname) %>% dplyr::mutate(int_density = deg / num) # pgs_min <- map(pgs_allele[pgs_allele%in%V(allnet_sl)$name],function(x) { # d=adj_sl[,x] # d[d <= thr] # } ) #names(pgs_min) <- pgs_allele[pgs_allele%in%V(allnet_sl)$name] adj[adj==0] <- NA pgs_min <- map(pgs_allele,function(x) { d=min(adj[,x],na.rm=T) d #d[d < thr]#%>%as.numeric() } ) %>% unlist() names(pgs_min) <- pgs_allele #adj[,'ybl008w-a'] ggplot(nones.exp.data %>% filter(int_density>=0),aes(x=group,y=int_density))+ stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) independence_test(group~int_density,data = nones.exp.data %>% mutate(group=as.factor(group)) %>% filter(int_density>=0))
``{R eval=FALSE}
library(coin)
for(i in 1:nrow(pgs.df)){
name <- pgs.df$orf_name[i] %>% as.character()
width <- width(pgs.data[pgs.data$name==name])
deg <- pgs_deg[names(pgs_deg)==name]
# strain_ids %>%
# filter(
Systematic gene name==name) %>%
# dplyr::select(
Allele Gene name) %>%
# pull()]
min <- pgs_min[names(pgs_min)==strain_ids %>%
filter(
Systematic gene name==name) %>%
dplyr::select(
Allele Gene name) %>%
pull()]
pgs.df$deg[i] <- ifelse(length(deg)==0,0,deg)
pgs.df$min[i] <- ifelse(length(min)==0,NA,min)
#tss.d <- tss.dist[tss.dist$orf_name==name,]
pgs.df$tss.bool[i] <- ifelse(pgs.df$tss[i]>200,1,0)
pgs.df$nucs[i] <- ifelse(countOverlaps(pgs.data[i],nucs)>0,1,0)#pgs.atac$data[pgs.atac$orf_name==name]
pgs.df$int_dens[i] <- nones.exp.data$int_density[nones.exp.data$
Systematic gene name`==name]
}
library(ggpubr) library(ggsignif)
#+scale_x_log10()+scale_y_log10()
convertBool <- function(x) ifelse(x>0,1,0)
p=pvalue(independence_test(nucs~deg,data=pgs.df %>% mutate(nucs=as.factor(nucs)))) nucs_deg <- ggplot(pgs.df %>% mutate(nucs=as.factor(nucs)),aes(x=nucs,y=deg))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Number of synthetic\nlethal interactions')+xlab('Nucleosome occupied')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=8),axis.text = element_text(size=6),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+coord_cartesian(ylim = c(0,18))#+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=17,tip_length = 0.005,textsize = 5)
p=pvalue(independence_test(nucs~min,data=pgs.df %>% mutate(nucs=as.factor(nucs))%>%unnest())) nucs_min <- ggplot(pgs.df %>% mutate(nucs=as.factor(nucs))%>%unnest(),aes(x=nucs,y=abs(min)))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Strongest negative\n interaction')+xlab('Nucleosome occupied')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=8),axis.text = element_text(size=6),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+coord_cartesian(ylim = c(0,0.8))#+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=0.75,tip_length = 0.05,textsize = 5)
nucs_grid <- cowplot::plot_grid(nucs_deg,nucs_min,nrow=2) ggsave('../analysis/figures/0812_poster/nfr_data_03.pdf',nucs_grid,width=3,height=6,units = 'in')
p=pvalue(independence_test(tss.bool~deg,data=pgs.df %>% mutate(tss.bool=as.factor(tss.bool))))
tss_deg <- ggplot(pgs.df %>% mutate(tss.bool=as.factor(tss.bool)),aes(x=tss.bool,y=deg))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Number of synthetic\nlethal interactions')+xlab('Distance to downstream \nTSS >200bp')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=8),axis.text = element_text(size=6),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+coord_cartesian(ylim = c(0,18))#+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=17,tip_length = 0.005,textsize = 5)
p=pvalue(independence_test(tss.bool~min,data=pgs.df %>% mutate(tss.bool=as.factor(tss.bool))%>%unnest())) tss_min <- ggplot(pgs.df %>% mutate(tss.bool=as.factor(tss.bool))%>%unnest(),aes(x=tss.bool,y=abs(min)))+ stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1),fill='lightblue') + # stat_summary(fun.data = n_fun, geom = "text",position=position_dodge(width=1))+ #stat_summary(aes(label=round(..y..,2)), fun.y=mean, geom="text", size=6,vjust = -0.5)+ stat_summary(fun.data = mean_se,width=0.5, geom = "errorbar",position = position_dodge(width=1))+theme_minimal()+ylab('Strongest negative\n interaction')+xlab('Distance to downstream \nTSS >200bp')+ scale_x_discrete(labels=c('No','Yes'))+theme(axis.title = element_text(size=8),axis.text = element_text(size=6),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank())+coord_cartesian(ylim = c(0,0.8))#+ geom_signif(annotations=c(paste0("p=",formatC(p,digits=3))),xmin=1,xmax=2, y_position=0.75,tip_length = 0.05,textsize = 5)
tss_grid <- cowplot::plot_grid(tss_deg,tss_min,nrow=2) cowplot::plot_grid(nucs_deg,nucs_min,tss_deg,tss_min) ggsave('../analysis/figures/0816_paper/tssnucs_data.pdf',width=4,height=4,units = 'in')
map(pgs.df$min,length)%>%unlist library(coin) independence_test(tss.bool~min,data=pgs.df %>% mutate(tss.bool=as.factor(tss.bool))) independence_test(nucs~min,data=pgs.df %>% mutate*nucs=as.factor(nucs)))
alldata.bool <- as_tibble(pgs.df) %>% mutate_if(is.numeric,convertBool)
fisher.test(with(alldata.bool,table(nucs,int_dens))) fisher.test(with(alldata.bool,table(tss.bool,int_dens))) with(alldata.bool,table(tss,deg)) ft
```r net_df_significant <- net_df_significant %>% mutate(group=ifelse(`Query Strain ID`%in%pgs|`Array Strain ID`%in%pgs,'proto-gene',ifelse(`Query Strain ID`%in%nones.exp.data$`Systematic gene name`==F&`Array Strain ID`%in%nones.exp.data$`Systematic gene name`==F,'essential','nonessential'))) ggplot(net_df_significant,aes(y=`Double mutant fitness standard deviation`,x=`Genetic interaction score (ε)`))+geom_point(aes(color=group))+xlim(NA,-0.2)+ylim(0,0.2) ggplot(net_df_significant%>%filter(`Genetic interaction score (ε)`<=-0.35),aes(y=`Double mutant fitness standard deviation`,x=group))+ stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) ggplot(net_df_significant%>%filter(`Genetic interaction score (ε)`<=-0.2),aes(x=`Double mutant fitness standard deviation`,color=group,y=..ndensity..))+geom_freqpoly(binwidth=0.01)+geom_vline(xintercept = 0.1) # stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + # stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) n_fun <- function(x){ return(data.frame(y = median(x)*1.15, label = paste0(length(x)))) } exp.data <- exp.data %>% mutate(bin = as.factor(bin))#num_cat = case_when( ggplot(nones.exp.data %>% filter(int_density>=0),aes(color=group,x=int_density))+geom_freqpoly(aes(y=..density..),binwidth=0.1)+scale_x_log10() independence_test(group~int_density,exp.data%>%filter(group!='essential'&cat!='nxes.only')%>%mutate(group=factor(group))) pdf('../analysis/figures/0709_interactiondensity/03501sdgroupedintdensity.pdf') #ggplot(exp.data,aes(x=deg,y=num,color=group))+geom_point()#+scale_x_log10()+scale_x_log10() pltdata=exp.data%>%filter(deg>0&group!='essential'&bin%in%exp.data$bin[exp.data$group=='proto-gene']&cat%in%exp.data$cat[exp.data$group=='proto-gene']) ggplot(pltdata,aes(x=bin,y=int_density,color=group,fill=cat))+#geom_boxplot() stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1))+facet_grid(cat~.) pdf('../analysis/figures/0709_interactiondensity/intdensity_nonespgs_nonxesonly02.pdf') ,color=group ggplot(exp.data%>%filter(group!='essential'&cat!='nxes.only'),aes(x=group,y=int_density,fill=group))+#geom_boxplot() stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1))+ stat_summary(fun.data = n_fun, geom = "text",position=position_dodge(width=1))+ylab("Interaction density")+xlab("")+ coord_cartesian(ylim=c(0, 0.008)) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.