This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

set.seed(1)
library(tidyverse)
library(igraph)
library(ggsignif)
library(cowplot)
source("R/pgs.subnetwork.R")
source("R/add.degree.R")
source("R/sample.nones.R")
source("R/plot_data.R")
pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv")
#pgs <- read_csv("~/Downloads/omersyntenyfilter.csv")
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
net.df <- readRDS("analysis/data/derived_data/SGA_data_combined.rds.gz")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)

net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)#& `Double mutant fitness standard deviation`<=0.1)

allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat))

try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
ess.names <- exp.data$`Systematic gene name`[exp.data$maincat=='essential']
sim_n=10
rand_degree <- vector("list",sim_n)
rand_betweenness <- vector("list",sim_n)
rand_trans <- vector("list",sim_n)
pgs_degree <- vector("list",sim_n)
pgs_betweenness <- vector("list",sim_n)
pgs_trans <- vector("list",sim_n)
colname <- colnames(nones.exp.data)[1]
pgs_ <- data.frame(orf_name=sample.nones(pgs$orf_name,nones.exp.data,colname))
for (i in 1:sim_n) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))
  net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list)

  net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)])
  allnet <- graph_from_edgelist(net_edgelist, directed = F)
  allnet <- simplify(allnet)
  # nones.exp.data <- nones.exp.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num)
  #
  # exp.number.data.filtered <- exp.number.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num) %>%
  #   filter(Allele.Gene.name %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, normalized = T)
  #allnet.eig <- eigen_centrality(allnet)$vector
allnet.trans <- transitivity(allnet,'local')
  names(allnet.trans) <- names(allnet.deg)
  #nn <- pgs.subnetwork(allnet, pgs$orf_name)

  for (t in 1:100) {
    allnet_re <- rewire(allnet, with = keeping_degseq(niter = 10*vcount(allnet)))
    allnet.deg.rnd <- degree(allnet_re)
    allnet.btw.rnd <- betweenness(allnet_re,normalized = T)
    allnet.trans.rnd <- transitivity(allnet_re,'local')
    names(allnet.trans.rnd) <- V(allnet_re)$name

    pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs$orf_name])
    pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs$orf_name])
    pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs$orf_name])
    rand_degree[[i]][t] <- list(allnet.deg.rnd[names(allnet.deg.rnd)%in%pgs$orf_name])
    rand_betweenness[[i]][t] <- list(allnet.btw.rnd[names(allnet.btw.rnd)%in%pgs$orf_name])
    rand_trans[[i]][t] <- list(allnet.trans.rnd[names(allnet.trans.rnd)%in%pgs$orf_name])
  }
}


rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_degree <- unlist(rand_degree)

rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_betweenness <- unlist(rand_betweenness)

rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_trans <- unlist(rand_trans)

pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_degree <- unlist(pgs_degree)

pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_betweenness <- unlist(pgs_betweenness)

pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_trans <- unlist(pgs_trans)

boxplot(pgs_degree %>%  map(mean,na.rm=T) %>% unlist(),map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist())
boxplot(pgs_betweenness %>%  map(mean,na.rm=T) %>% unlist(),map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist())
boxplot(pgs_trans %>%  map(mean,na.rm=T) %>% unlist(),map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist())

plot_data(pgs_degree_mean,rand_degree_mean,rand_degree_mean,pgs_degree,rand_degree,rand_degree,title="Degree")
plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_betweenness_mean,pgs_betweenness,rand_betweenness,rand_betweenness,title="Betweenness",binwidth = 0.0001,xlim = 0)
plot_data(pgs_trans_mean,rand_trans_mean,rand_trans_mean,pgs_trans,rand_trans,rand_trans,title="Transitivity",xlim=0,binwidth = 0.01)

plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_betweenness,title="Betwenness")
plot_data_ecdf(pgs_trans,rand_trans,rand_trans,title = "Transitivity")
sim_n=100
rand_degree <- vector("list",sim_n)
rand_betweenness <- vector("list",sim_n)
rand_trans <- vector("list",sim_n)
pgs_degree <- vector("list",sim_n)
pgs_betweenness <- vector("list",sim_n)
pgs_trans <- vector("list",sim_n)
colname <- colnames(nones.exp.data)[1]
for (i in 1:sim_n) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))
  net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list)
pgs_ <- data.frame(orf_name=sample.nones(pgs$orf_name,nones.exp.data,colname))

  net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)])
  allnet <- graph_from_edgelist(net_edgelist, directed = F)
  allnet <- simplify(allnet)
  # nones.exp.data <- nones.exp.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num)
  #
  # exp.number.data.filtered <- exp.number.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num) %>%
  #   filter(Allele.Gene.name %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, normalized = T)
  #allnet.eig <- eigen_centrality(allnet)$vector
allnet.trans <- transitivity(allnet,'local')
  names(allnet.trans) <- names(allnet.deg)
  #nn <- pgs.subnetwork(allnet, pgs$orf_name)

  for (t in 1:10) {
    allnet_re <- rewire(allnet, with = keeping_degseq(niter = 10*vcount(allnet)))
    allnet.deg.rnd <- degree(allnet_re)
    allnet.btw.rnd <- betweenness(allnet_re,normalized = T)
    allnet.trans.rnd <- transitivity(allnet_re,'local')
    names(allnet.trans.rnd) <- V(allnet_re)$name

    pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_$orf_name])
    pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_$orf_name])
    pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_$orf_name])
    rand_degree[[i]][t] <- list(allnet.deg.rnd[names(allnet.deg.rnd)%in%pgs_$orf_name])
    rand_betweenness[[i]][t] <- list(allnet.btw.rnd[names(allnet.btw.rnd)%in%pgs_$orf_name])
    rand_trans[[i]][t] <- list(allnet.trans.rnd[names(allnet.trans.rnd)%in%pgs_$orf_name])
  }
}


rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_degree <- unlist(rand_degree)

rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_betweenness <- unlist(rand_betweenness)

rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_trans <- unlist(rand_trans)

pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_degree <- unlist(pgs_degree)

pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_betweenness <- unlist(pgs_betweenness)

pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_trans <- unlist(pgs_trans)


plot_data(pgs_degree_mean,rand_degree_mean,rand_degree_mean,pgs_degree,rand_degree,rand_degree,title="Degree")
plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_betweenness_mean,pgs_betweenness,rand_betweenness,rand_betweenness,title="Betweenness",binwidth = 0.0001,xlim = 0)
plot_data(pgs_trans_mean,rand_trans_mean,rand_trans_mean,pgs_trans,rand_trans,rand_trans,title="Transitivity",xlim=0,binwidth = 0.01)

plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_betweenness,title="Betwenness")
plot_data_ecdf(pgs_trans,rand_trans,rand_trans,title = "Transitivity")


# checkRand <- function(allnet, allnet_re){
#   el_allnet_re=get.edgelist(allnet_re) %>% as.data.frame %>% group_by(V1,V2) %>% arrange(V1,V2)
#   el_allnet=get.edgelist(allnet) %>% as.data.frame %>% group_by(V1,V2) %>% arrange(V1,V2)
#   elcheck=paste0(el_allnet$V1,el_allnet$V2)%in%paste0(el_allnet_re$V1,el_allnet_re$V2)
#   sum(elcheck)/length(elcheck)
# 
# 
# }
rand_giant_nodes <- c()
rand_giant_edges <- c()
rand_giant_nodes_es <- c()
rand_giant_edges_es <- c()
giant_nodes <- c()
giant_edges <- c()
colname <- colnames(nones.exp.data)[1]
for (i in 1:20) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))
  net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list)

  net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)])

  pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs) %>%
    select(`Allele Gene name`) %>%
    pull()
  allnet <- graph_from_edgelist(net_edgelist, directed = F)
  allnet <- simplify(allnet)
  nones.exp.data <- nones.exp.data %>%
    add.degree(allnet, colname) %>%
    dplyr::mutate(int_density = deg / num)

  exp.number.data.filtered <- exp.data %>%
    add.degree(allnet, colname) %>%
    mutate(int_density = deg / num) %>%
    filter(`Allele Gene name` %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, normalized = T)
  allnet.eig <- eigen_centrality(allnet)$vector


  nn <- pgs.subnetwork(allnet, pgs$orf_name)
  giant_comp_nn <- induced_subgraph(nn, V(nn)[components(nn)$membership == which.max(components(nn)$csize)])
  # giant_nodes <- append(giant_nodes,vcount(giant_comp_nn))
  # giant_edges <- append(giant_edges,ecount(giant_comp_nn))
  for (t in 1:100) {
    giant_nodes <- append(giant_nodes, vcount(giant_comp_nn))
    giant_edges <- append(giant_edges, ecount(giant_comp_nn))
    s <- sample.nones(pgs$orf_name[pgs$orf_name %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T)
    nn_re <- pgs.subnetwork(allnet, s)
    giant_comp_nn_re <- induced_subgraph(nn_re, V(nn_re)[components(nn_re)$membership == which.max(components(nn_re)$csize)])
    rand_giant_nodes <- append(rand_giant_nodes, vcount(giant_comp_nn_re))
    rand_giant_edges <- append(rand_giant_edges, ecount(giant_comp_nn_re))
    ess.s <-  sample.nones(pgs$orf_name[pgs$orf_name %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%ess.names)|(!!sym(colname)%in%pgs$orf_name[pgs$orf_name %in% V(allnet)$name])),colname,array=FALSE,dist=T)

      #sample(V(allnet)$name[V(allnet)$name %in% ess.names$X1], length(pgs$orf_name[pgs$orf_name %in% V(allnet)$name]))
    nn_es <- pgs.subnetwork(allnet, ess.s)
    giant_comp_nn_es <- induced_subgraph(nn_es, V(nn_es)[components(nn_es)$membership == which.max(components(nn_es)$csize)])
    rand_giant_nodes_es <- append(rand_giant_nodes_es, vcount(giant_comp_nn_es))
    rand_giant_edges_es <- append(rand_giant_edges_es, ecount(giant_comp_nn_es))
  }
}

# boxplot(giant_nodes,rand_giant_nodes,rand_giant_nodes_es)
# boxplot(giant_edges,rand_giant_edges,rand_giant_edges_es)
giant_comp_df <- bind_rows(data.frame(data = giant_nodes, group = "proto-gene"), data.frame(data = rand_giant_nodes, group = "nonessential"), data.frame(data = rand_giant_nodes_es, group = "essential"))
giant_comp_df <- bind_rows(data.frame(data = giant_edges, group = "proto-gene"), data.frame(data = rand_giant_edges, group = "nonessential"), data.frame(data = rand_giant_edges_es, group = "essential"))
giant_comp_df$group <- factor(giant_comp_df$group, levels = c("proto-gene", "nonessential", "essential"))
# ggplot(giant_comp_df,aes(x=group,y=data,fill=group))+geom_boxplot()
p12 <- 2 * sum(abs(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) - abs(giant_comp_df$data[giant_comp_df$group == "nonessential"]) >= 0) / length(giant_comp_df$data[giant_comp_df$group == "proto-gene"])
p13 <- 2 * sum(abs(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) - abs(giant_comp_df$data[giant_comp_df$group == "essential"]) >= 0) / length(giant_comp_df$data[giant_comp_df$group == "proto-gene"])
p23 <- 2 * sum(abs(giant_comp_df$data[giant_comp_df$group == "nonessential"]) - abs(giant_comp_df$data[giant_comp_df$group == "essential"]) >= 0) / length(giant_comp_df$data[giant_comp_df$group == "proto-gene"])

# function for number of observations 
give.n <- function(x){
  return(c(y = median(x)*1.05, label = length(x))) 
  # experiment with the multiplier to find the perfect position
}

ggplot(giant_comp_df, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot(outlier.shape = NA) + scale_x_discrete(labels = c("Proto-genes", "Non-essential Genes", "Essential Genes")) + theme_bw() +
  ylab("Giant component size") + xlab("") + theme(
    panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    panel.background = element_blank(), legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size = 16)
  ) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))# +
ggsave('analysis/figures/0816_paper/giantcompsize_withdegedges.pdf')  
geom_signif(
    annotations = c("ns", "ns", "ns"),
    y_position = c(235, 250, 240), xmin = c(1, 1, 2), xmax = c(2, 3, 3)) #+ggtitle(itr)
# plotBoxSign(giant_comp_df)
source('R/dist.sample.R')
sim_n=10
essentials <- c(net_df_significant$`Query Strain ID`[net_df_significant$data_source=='ExE'],net_df_significant$`Array Strain ID`[net_df_significant$data_source=='ExE']) %>% unique()#net.df%>%filter(data_source=='ExE')
rand_degree <- vector("list",sim_n)
rand_betweenness <- vector("list",sim_n)
rand_trans <- vector("list",sim_n)
rand_ess_degree <- vector("list",sim_n)
rand_ess_betweenness <- vector("list",sim_n)
rand_ess_trans <- vector("list",sim_n)
pgs_degree <- vector("list",sim_n)
pgs_betweenness <- vector("list",sim_n)
pgs_trans <- vector("list",sim_n)
colname <- colnames(nones.exp.data)[1]
pgs_ <- pgs#[pgs$orf_name%in%nones.exp.data$`Systematic gene name`[nones.exp.data$cat=='nxes.only'],]
for (i in 1:sim_n) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))
  net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list)

  net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)])

  pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs) %>%
    select(`Allele Gene name`) %>%
    pull()
  allnet <- graph_from_edgelist(net_edgelist, directed = F)
  allnet <- simplify(allnet)
  nones.exp.data <- nones.exp.data %>%
    add.degree(allnet, colname) %>%
    dplyr::mutate(int_density = deg / num)

  exp.number.data.filtered <- exp.data %>%
      add.degree(allnet, colname) %>%
      mutate(int_density = deg / num) %>%
      filter(`Allele Gene name` %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, normalized = T,weights = NA)
 # allnet.eig <- eigen_centrality(allnet)$vector
  allnet.trans <- transitivity(allnet,'local')
  names(allnet.trans) <- names(allnet.deg)
  # nn<-pgs.subnetwork(allnet, pgs$orf_name)
  # giant_comp_nn <- induced_subgraph(nn,V(nn)[components(nn)$membership==which.max(components(nn)$csize)])
  # giant_nodes <- append(giant_nodes,vcount(giant_comp_nn))
  # giant_edges <- append(giant_edges,ecount(giant_comp_nn))
  for (t in 1:100) {
    #cat=='nxes.only'&
    s <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=F,array=T)
    s.dist <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T)
    ess.s <-  sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%ess.names)|(!!sym(colname)%in%pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name])),colname,array=FALSE,dist=F)
    ess.s.dist <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%ess.names)|(!!sym(colname)%in%pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name])),colname,array=FALSE,dist=T)

    rand_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%s])
    rand_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%s.dist])
    rand_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%s.dist])
    pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_$orf_name])
    pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_$orf_name])
    pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_$orf_name])
    rand_ess_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%ess.s])
    rand_ess_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%ess.s.dist])
    rand_ess_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%ess.s.dist])

  }

}

rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_degree <- unlist(rand_degree)

rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_betweenness <- unlist(rand_betweenness)

rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_trans <- unlist(rand_trans)

rand_ess_degree_mean <- map(rand_ess_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_degree <- unlist(rand_ess_degree)

rand_ess_betweenness_mean <- map(rand_ess_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_betweenness <- unlist(rand_ess_betweenness)

rand_ess_trans_mean <- map(rand_ess_trans,function(x) map(x,mean,na.rm=T)) %>% unlist()
rand_ess_trans <- unlist(rand_ess_trans)

pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_degree <- unlist(pgs_degree)

pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_betweenness <- unlist(pgs_betweenness)

pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_trans <- unlist(pgs_trans)
d1=plot_data(pgs_degree_mean,rand_degree_mean,rand_ess_degree_mean,pgs_degree,rand_degree,rand_ess_degree,title = 'degree')[[1]]
d2=plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_ess_betweenness_mean,pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.0001,xlim=0,title = 'betweenness')[[1]]
d3=plot_data(pgs_trans_mean,rand_trans_mean,rand_ess_trans_mean,pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'transitivity')[[1]]

plot_grid(d1,d2,d3,ncol=3)






d1=plot_data_ecdf(pgs_degree,rand_degree,rand_ess_degree,title = 'Degree',binwidth=2,logscale=T)
d2=plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'Betweenness',logscale=F)
d3=plot_data_ecdf(pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'Transitivity',logscale=F)
#plot_data_ecdf(nones.exp.data$deg[nones.exp.data$`Systematic gene name`%in%pgs$orf_name],nones.exp.data$deg[nones.exp.data$`Systematic gene name`%in%pgs$orf_name==F],nones.exp.data$deg[nones.exp.data$`Systematic gene name`%in%pgs$orf_name==F],title='Interaction density')
# 
pdf('analysis/figures/0816_paper/int_data_cdf_0.2nosd.pdf',width = 7,height = 3)
 plot_grid(d1,d2,d3,ncol=3)
dev.off()

png('analysis/figures/0805_paper/int_data_cdf_0.2nosd.png',width = 2000,height = 1200)
 plot_grid(d1,d2,d3,ncol=3)
dev.off()
library(plyr)
load("~/Main/anne/network_analysis/0409/intData.Rdata")
load("~/Main/anne/network_analysis/0409/pccData.Rdata")
load("~/Main/anne/network_analysis/0530essential/int_essential.RData")
load("~/Main/anne/network_analysis/0530essential/pcc_essential.RData")

# dff.int$group <- revalue(dff.int$group, c("neighborhood"="nodeg", "neighborhood with same distribution"="simulation"))

sm.list.int$type <- revalue(sm.list.int$type, c("actual" = "Proto-gene", "simulation" = "Non-essential"))
sm.list.pcc$type <- revalue(sm.list.pcc$type, c("actual" = "Proto-gene", "simulation" = "Non-essential"))

sm.list.nodeg.pcc.mean <- readRDS("~/Main/anne/network_analysis/0409/sm.list.nodeg.pcc.mean.rds")
sm.list.nodeg.pcc <- readRDS("~/Main/anne/network_analysis/0409/sm.list.nodeg.pcc.rds")
sm.list.nodeg.int$type <- revalue(sm.list.nodeg.int$type, c("actual" = "Proto-gene", "simulation" = "Non-essential"))
sm.list.nodeg.pcc$type <- revalue(sm.list.nodeg.pcc$type, c("actual" = "Proto-gene", "simulation" = "Non-essential"))

d1=plot_data(sm.list.int.mean$nn.mean,sm.list.int.mean$nones.mean,int_essentialmeandf$degree,sm.list.int$degree[sm.list.int$group=='proto-gene'&sm.list.int$type=='Proto-gene'],sm.list.int$degree[sm.list.int$group=='proto-gene'&sm.list.int$type=='Non-essential'],int_essentialdf$degree[int_essentialdf$group=='proto-gene'&int_essentialdf$type=='essential'],title = 'degree')[[1]]
d2=plot_data(sm.list.int.mean$nn.btw.mean,sm.list.int.mean$nones.btw.mean,int_essentialmeandf$betweenness,sm.list.int$betweenness[sm.list.int$group=='proto-gene'&sm.list.int$type=='Proto-gene'],sm.list.int$betweenness[sm.list.int$group=='proto-gene'&sm.list.int$type=='Non-essential'],int_essentialdf$betweenness[int_essentialdf$group=='proto-gene'&int_essentialdf$type=='essential'],binwidth = 0.0001,xlim=0,title = 'betweenness')[[1]]
d3=plot_data(pgs_trans_mean,rand_trans_mean,rand_ess_trans_mean,pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'transitivity')[[1]]

plot_grid(d1,d2,d3,ncol=3)
hist(allnet.deg[s],breaks=-1:max(allnet.deg[s]))
hist(allnet.deg[names(allnet.deg)%in%pgs$orf_name],breaks=-1:max(allnet.deg[names(allnet.deg)%in%pgs$orf_name]))

nones.exp.data[nones.exp.data$`Systematic gene name`%in%(allnet.deg[s][allnet.deg[s]>30] %>% names()),]
nones.exp.data %>% filter(`Systematic gene name` %in% pgs$orf_name&(cat=='nq.nxes')&bin>80)
nones.exp.data %>% filter(`Systematic gene name` %in% pgs$orf_name&(cat=='na.only')&bin>80)

nones.exp.data %>% filter((cat=='nq.nxes')&bin>80&deg>30) 
nones.exp.data %>% filter((cat=='na.only')&bin>80&deg>30) 

boxplot((nones.exp.data %>% filter((cat=='na.only')&bin>80) %>% select(deg) %>% pull()),(nones.exp.data %>% filter(`Systematic gene name` %in% pgs$orf_name&(cat=='nq.nxes')&bin>80) %>% select(deg) %>% pull()))


dd=map(nones.exp.data$cat %>% unique(), function(x) nones.exp.data %>% filter(cat==x) %>%  ggplot(aes(num))+geom_histogram(binwidth = 1)+geom_hline(yintercept = 1,color='red')+geom_hline(yintercept = 2,color='red')+ggtitle(x))
net_df_significant <- net_df_significant %>% mutate(group=ifelse(`Query Strain ID`%in%pgs$orf_name|`Array Strain ID`%in%pgs$orf_name,
                                           'Proto-gene',
                                           ifelse(`Query Strain ID`%in%essentials|`Array Strain ID`%in%essentials,'Essential','Non-essential')))

ggplot(net_df_significant %>% filter(`Genetic interaction score (ε)`<=-0.08),aes(x=group,y=`Double mutant fitness standard deviation`))+#geom_boxplot()
  stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) +
  stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1))

ggplot(net_df_significant,aes(x=`Genetic interaction score (ε)`,y=`Double mutant fitness standard deviation`,color=group))+geom_point()+ylim(0,NA)+xlim(NA,-0.08)
#ggplot(comb_nw_data, aes(x=type,y=data,color=group,fill=group,group = as.factor(group))) +`Double mutant fitness standard deviation`<0.1
rand_degree <- c()
nn_degree <- c()
colname <- colnames(nones.exp.data)[1]
for (i in 1:10) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))
  net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list)

  net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)])

  pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs) %>%
    select(`Allele Gene name`) %>%
    pull()
  allnet <- graph_from_edgelist(net_edgelist, directed = F)
  allnet <- simplify(allnet)
  nones.exp.data <- nones.exp.data %>%
    add.degree(allnet, colname) %>%
    dplyr::mutate(int_density = deg / num)

  # exp.number.data.filtered <- exp.number.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num) %>%
  #   filter(Allele.Gene.name %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, normalized = T)
  allnet.eig <- eigen_centrality(allnet)$vector

  for (t in 1:100) {
    s <- sample.nones(pgs$orf_name[pgs$orf_name %in% V(allnet)$name], nones.exp.data, colname)
    nn_degree <- append(nn_degree, mean(allnet.deg[names(allnet.deg) %in% pgs$orf_name]))

    rand_degree <- append(rand_degree, mean(allnet.deg[names(allnet.deg) %in% s]))
  }
}

sum(nn_degree > rand_degree) / length(nn_degree)
# boxplot(giant_edges,rand_giant_edges)
pgs_eps <- net_df_significant %>%
  filter(`Query Strain ID` %in% pgs$orf_name | `Array Strain ID` %in% pgs$orf_name) %>%
  mutate(data = `Genetic interaction score (ε)`, group = "proto-genes")
nones_eps <- net_df_significant %>%
  filter((`Query Strain ID` %in% pgs$orf_name | `Array Strain ID` %in% pgs$orf_name) == F &
    (`Query Strain ID` %in% nones.exp.data$`Systematic gene name` | `Array Strain ID` %in% nones.exp.data$`Systematic gene name`)) %>%
  mutate(data = `Genetic interaction score (ε)`, group = "Nonessential")
ess_eps <- net_df_significant %>%
  filter((`Query Strain ID` %in% pgs$orf_name | `Array Strain ID` %in% pgs$orf_name) == F &
    (`Query Strain ID` %in% ess.names | `Array Strain ID` %in% ess.names)) %>%
  mutate(data = `Genetic interaction score (ε)`, group = "Essential")

eps_comb <- bind_rows(pgs_eps %>% select(data, group), nones_eps %>% select(data, group), ess_eps %>% select(data, group))

library(ggsignif)

# p12 <- pvalue(independence_test(data ~ group, data = (eps_comb %>% filter(group == "proto-genes" | group == "Nonessential"))))
# p13 <- pvalue(independence_test(data ~ group, data = (eps_comb %>% filter(group == "proto-genes" | group == "Essential"))))
# p23 <- pvalue(independence_test(data ~ group, data = (eps_comb %>% filter(group == "Essential" | group == "Nonessential"))))
ggplot(eps_comb %>% filter(data <= -0.2), aes(x = data, color = group, y = ..ndensity..)) + geom_freqpoly(binwidth = 0.05) + geom_vline(xintercept = -0.2, linetype = "dashed") + xlim(NA, -0.2) #+geom_vline(xintercept = 0.08,linetype="dashed")

ggplot(eps_comb %>% filter(data <= -0), aes(x = data, color = group, y = ..ndensity..)) + geom_freqpoly(binwidth = 0.05) + geom_vline(xintercept = -0.2, linetype = "dashed") + xlim(NA, -0) #+geom_vline(xintercept = 0.08,linetype="dashed")


+scale_x_discrete(labels = c("Proto-genes", "Non-essential Genes", "Essential Genes")) + theme_bw() +
  ylab("Shortest distance") + xlab("") + theme(
    panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    panel.background = element_blank(), legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size = 16)
  ) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange")) # +
# geom_signif(
#   annotations = c(paste0("p=", formatC(p12, digits = 3)), paste0("p=", formatC(p13, digits = 3)), paste0("p=", formatC(p23, digits = 3))),
#   y_position = c(max(eps_comb$data) + 1, max(eps_comb$data) + 3, max(eps_comb$data) + 2), xmin = c(1, 1, 2), xmax = c(2, 3, 3)
# )
net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)# & `Double mutant fitness standard deviation`<=0.1)

net=graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F)
pgs.subnetwork(net,pgs$orf_name)  %>% write_graph('analysis/figures/0708pres/int_.2_nosd.graphml',format='graphml')

#plot(net %>% induced_subgraph(neighborhood(.,order = 1,nodes = 'yer175w-a')[[1]]))
#neighborhood(net,order = 1,nodes = 'YER175W-A')
library(RCy3)
net=graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F) %>% simplify()

pgs_sub <- pgs.subnetwork(net,pgs$orf_name)
induced_subgraph(pgs_sub,names(components(pgs_sub)$membership[components(pgs_sub)$membership==1]))%>% createNetworkFromIgraph('int_pgs')
setNodeSizeMapping('cat',c('gene','proto-gene'),sizes=c(30,60),mapping.type = 'd')
layoutNetwork('kamada-kawai')# defaultSpringLength=100 defaultSpringCoefficient=0.0000003')
setEdgeColorDefault('#000000')
setBackgroundColorDefault('#ffffff')
setEdgeOpacityDefault(50)

fitContent()
exportImage("analysis/figures/0812_poster/pgs_sub_int", "PDF",resolution=300, height = 5, width = 5,units='inches')


oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.