R/calculateInteractionDensity.R

Defines functions n_fun

library(tidyverse)
library(igraph)
library(ggsignif)
library(cowplot)
source("R/add.degree.R")
##Figure 1c----------
#pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv")
#pgs <- read_csv("~/Downloads/omersyntenyfilter.csv")
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
net.df <- readRDS("analysis/data/derived_data/SGA_data_combined.rds.gz")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)

net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)#& `Double mutant fitness standard deviation`<=0.1)

allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat))
try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
ess.names <- exp.data$`Systematic gene name`[exp.data$maincat=='essential']


n_fun <- function(x){
  return(data.frame(y = median(x)*1.15, label = paste0('n= ',length(x))))
}

pgs_allele <- exp.data %>%
  filter(group=='proto-gene') %>%
  dplyr::select(`Allele Gene name`) %>%
  pull()


thr <- -0.2
allnet <- graph_from_data_frame(net_df_significant[,c(2,4,6)],directed=FALSE)
adj <- as_adjacency_matrix(allnet,type='both',attr="Genetic interaction score (ε)") %>% as.matrix()
adj_nonw <- as_adjacency_matrix(allnet,type='both') %>% as.matrix()
net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= thr)#& `Double mutant fitness standard deviation` <= 0.1)
allnet_sl <- graph_from_data_frame(net_df_significant_sl[,c(2,4)],directed=FALSE)#%>% igraph::simplify()
colname <- colnames(nones.exp.data)[2]


exp.data <- exp.data %>%
  add.degree(allnet_sl, colname) %>%
  dplyr::mutate(int_density = deg / num)
exp.data$essint <- NA
exp.data$nonesint <- NA
exp.data$esscount <- NA
exp.data$nonescount <- NA
for( i in seq_along(exp.data$`Allele Gene name`)){
  name <- exp.data$`Allele Gene name`[i]
  if(name%in%colnames(adj)){
    interactions <- adj[name,]
    int_nonw <- adj_nonw[name,]
    lethal_int_name <- names(interactions[interactions<=thr])
    exp.data$essint[i] <- sum(lethal_int_name%in%exp.data$`Allele Gene name`[exp.data$maincat=='essential'])
    exp.data$nonesint[i] <- sum(lethal_int_name%in%exp.data$`Allele Gene name`[exp.data$maincat=='nonessential'])
    #names(int_nonw[int_nonw==1])
    exp.data$esscount[i] <- sum(names(int_nonw[int_nonw==1])%in%exp.data$`Allele Gene name`[exp.data$maincat=='essential'])
    exp.data$nonescount[i] <- sum(names(int_nonw[int_nonw==1])%in%exp.data$`Allele Gene name`[exp.data$maincat=='nonessential'])
  }

}

exp.data <- exp.data %>%
  dplyr::mutate(essint_density = essint / esscount, nonesint_density = nonesint / nonescount, cat=ifelse(is.na(cat),'essential',cat))

pe12 = coin::independence_test(group~essint,exp.data %>% mutate(group = as.factor(group)) %>% filter(group!='essential'))
pn12 = coin::independence_test(group~nonesint,exp.data %>% mutate(group = as.factor(group)) %>% filter(group!='essential'))
#1.028e-05 nones~pgs for nones ints
#0.0165 nones~pgs for ess ints
#p-value = 0.00124 nones~pgs for nones ints
#p-value = 0.01571 nones~pgs for ess ints
exp.data$group <- factor(exp.data$group,levels = c('proto-gene','nonessential','essential'))
exp.data %>% select(group,essint_density,nonesint_density) %>% gather(type,int_density,-group) %>%
  ggplot(aes(x=type,y=int_density,fill=group))+#geom_boxplot()
  stat_summary(fun.y = mean, geom = "bar",position = position_dodge(width=1)) +
  stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1),width=0.5)+ylab('Interaction density')+xlab("")+
  scale_x_discrete(labels=c('Essential\nInteractions','Nonessential\nInteractions'))+
  theme_me_bar +
  scale_fill_manual(labels=c('Proto-genes','Nonessential\nGenes','Essential\nGenes'),values = c( "#1CBDC2", "#EF3D23","#FAA51A"))+
  geom_signif(
    annotations = c(paste0('p= ',formatC(coin::pvalue(pe12), digits = 2)), paste0('p= ',formatC(coin::pvalue(pn12), digits = 1))),
    y_position = c(0.07,0.05), xmin = c(0.65, 1.65), xmax = c(1, 2),tip_length = 0.005,textsize = 1
  ) +theme(legend.position = 'right')#stat_summary(fun.data = n_fun, geom = "text",position=position_dodge(width=1))+
ggsave('figures/Figure2C.pdf',width = 3,height = 2)
oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.