R/pccNetworkAnalysis.R

set.seed(1)
## Read Inputs files----------------
library(tidyverse)
library(igraph)
library(ggsignif)
library(cowplot)
library(data.table)
source("R/pgs.subnetwork.R")
source("R/add.degree.R")
source("R/sample.nones.R")
source("R/plot_data.R")
#source('R/dist.sample.R')

overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
costanzo_matrix_data <- fread('analysis/data/raw_data/pcc_ALL.txt',header=T)
costanzo_matrix <- costanzo_matrix_data[-1,-c(1,2)]%>%as.matrix()
rownames(costanzo_matrix) <- colnames(costanzo_matrix)
class(costanzo_matrix) <- "numeric"
adj_res_graph <- graph_from_adjacency_matrix(costanzo_matrix,weighted=TRUE,mode='undirected',diag=F)
g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))#

g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight)))

g_final <- delete.vertices(g_final,which(degree(g_final)<1 ))
pcc.net <- g_final#graph_from_data_frame(df.pcc,F)
# E(pcc.net)$weight <- df.pcc$pcc
nw.del<-igraph::simplify(pcc.net,remove.loops = T)




allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat))
try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
essentials <-  exp.number.data$`Allele Gene name`[exp.number.data$maincat=='essential']
#read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull()







## r random node selection degree-betweenness-trans-------------
sim_n=10
rand_degree <- vector("list",sim_n)
rand_betweenness <- vector("list",sim_n)
rand_trans <- vector("list",sim_n)
rand_ess_degree <- vector("list",sim_n)
rand_ess_betweenness <- vector("list",sim_n)
rand_ess_trans <- vector("list",sim_n)
pgs_degree <- vector("list",sim_n)
pgs_betweenness <- vector("list",sim_n)
pgs_trans <- vector("list",sim_n)
colname <- colnames(nones.exp.data)[2]
for (i in 1:sim_n) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))

  pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs$orf_name) %>%
    select(`Allele Gene name`) %>%
    pull()
  allnet <- induced.subgraph(nw.del, V(nw.del)[V(nw.del)$name%in%allnw.list]) %>%
    delete.vertices(which(degree(.)==0))

  nones.exp.data <- nones.exp.data %>%
    add.degree(allnet, colname) %>%
    dplyr::mutate(int_density = deg / num)

  exp.number.data.filtered <- exp.number.data %>%
    add.degree(allnet, colname) %>%
    mutate(int_density = deg / num) %>%
    filter(`Allele Gene name` %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, weights = NA,normalized = T)
  allnet.trans <- transitivity(allnet,'local')
  names(allnet.trans) <- names(allnet.deg)

  for (t in 1:100) {
    s <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=F)
    s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T)
    ess.s <-  sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele%in% V(allnet)$name])),colname,array=FALSE,dist=F)
    ess.s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele %in% V(allnet)$name])),colname,array=FALSE,dist=T)

    rand_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%s])
    rand_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%s.dist])
    rand_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%s.dist])
    pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_allele])
    pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_allele])
    pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_allele])
    rand_ess_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%ess.s])
    rand_ess_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%ess.s.dist])
    rand_ess_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%ess.s.dist])

  }
  print(i)
}

rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_degree <- unlist(rand_degree)

rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_betweenness <- unlist(rand_betweenness)

rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_trans <- unlist(rand_trans)

rand_ess_degree_mean <- map(rand_ess_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_degree <- unlist(rand_ess_degree)

rand_ess_betweenness_mean <- map(rand_ess_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_betweenness <- unlist(rand_ess_betweenness)

rand_ess_trans_mean <- map(rand_ess_trans,function(x) map(x,mean,na.rm=T)) %>% unlist()
rand_ess_trans <- unlist(rand_ess_trans)

pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_degree <- unlist(pgs_degree)

pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_betweenness <- unlist(pgs_betweenness)

pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_trans <- unlist(pgs_trans)


## r degree-betweenness-trans plots ecdf-----------
d1=plot_data_ecdf(pgs_degree,rand_degree,rand_ess_degree,title = 'Degree',binwidth=2,logscale = T)
d2=plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'Betweenness',logscale = T)
d3=plot_data_ecdf(pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'Transitivity',logscale = T)

legend = get_legend(d1+theme(legend.box = 'horizontal',legend.justification = 'center'))
prow = plot_grid(d1+theme(legend.position = 'none'),
          d2+theme(legend.position = 'none'),
          d3+theme(legend.position = 'none'),
          align = 'vh',
          hjust = -1,
          nrow = 1)

pdf('figures/Figure4E_G.pdf',width = 7,height = 3)
plot_grid(prow,legend,ncol=1,rel_heights = c(1,.1))
dev.off()

# png('analysis/figures/0805_paper/pcc_data_cdf_cost_78.png',width = 2000,height = 1200)
# plot_grid(d1,d2,d3,ncol=3)
# dev.off()



#degree p=0.349
#btw p = 0.271
#trans p = 0.152
oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.