This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

set.seed(1)
library(tidyverse)
library(igraph)
library(ggsignif)
library(cowplot)
library(data.table)
source("R/pgs.subnetwork.R")
source("R/add.degree.R")
source("R/sample.nones.R")
source("R/plot_data.R")
getwd()
#set.seed(1)
pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv")
#read_csv("~/Main/pgsNetwork/data/0701_proto-genes_list")
#pgs <- read_csv("~/Downloads/omersyntenyfilter.csv")
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
costanzo=T
if(costanzo==F){
  #read my pcc values

ndund1='analysis/data/derived_data/pcc_calculations_rcpp/nodelist_und1'
und1pcc <- 'analysis/data/derived_data/pcc_calculations_rcpp/und12avgpcc'
ndavgnorm <- 'analysis/data/derived_data/pcc_calculations_rcpp/ndlistavgpcc_calc_normalized_matrix.csv'
avgnorm <- 'analysis/data/derived_data/pcc_calculations_rcpp/avgpcc_calc_normalized_matrix.csv'
pcc0703 <- 'analysis/data/derived_data/pcc_calculations_rcpp/pcc_calc_0703'
ndlist0703 <- 'analysis/data/derived_data/pcc_calculations_rcpp/nodelist_0703'
pccnorm <- 'analysis/data/derived_data/pcc_calculations/pcc_calc_normalized_matrix.csv'
ndlistpccnorm <- 'analysis/data/derived_data/pcc_calculations/ndlistpcc_calc_normalized_matrix.csv'
# ndlist <- fread(ndlist_fname,header=F)
# adj_res <- fread(adj_res_fname,header=F)
adj_res <- fread(pcc0703,header=F)
ndlist <- fread(ndlist0703,header=F)

adj_res <- as.matrix(adj_res)
colnames(adj_res) <- ndlist$V1
rownames(adj_res) <- ndlist$V1

adj_res_graph <- graph_from_adjacency_matrix(adj_res,weighted=TRUE,mode='undirected',diag=F)
g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))


g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight)))

g_final <- delete.vertices(g_final,which(degree(g_final)<1 ))
pcc.net <- g_final#graph_from_data_frame(df.pcc,F)
#E(pcc.net)$weight <- df.pcc$pcc
nw.del<-igraph::simplify(pcc.net,remove.loops = T)
}else{
  costanzo_matrix_data <- fread('analysis/data/raw_data//pcc_ALL.txt',header=T)
  costanzo_matrix <- costanzo_matrix_data[-1,-c(1,2)]%>%as.matrix()
  rownames(costanzo_matrix) <- colnames(costanzo_matrix)
  class(costanzo_matrix) <- "numeric"
  adj_res_graph <- graph_from_adjacency_matrix(costanzo_matrix,weighted=TRUE,mode='undirected',diag=F)
  g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))#
  # load('~/Main/anne/network_analysis/df.pcc.Rdata')
  # df.pcc <- df.pcc %>% filter((pcc)>=0.2) %>% drop_na()

  g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight)))

  g_final <- delete.vertices(g_final,which(degree(g_final)<1 ))
  pcc.net <- g_final#graph_from_data_frame(df.pcc,F)
  # E(pcc.net)$weight <- df.pcc$pcc
   nw.del<-igraph::simplify(pcc.net,remove.loops = T)
}



allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat))
try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F)
essentials <-  exp.number.data$`Allele Gene name`[exp.number.data$maincat=='essential']
#read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull()
sim_n=10
rand_degree <- vector("list",sim_n)
rand_betweenness <- vector("list",sim_n)
rand_trans <- vector("list",sim_n)
pgs_degree <- vector("list",sim_n)
pgs_betweenness <- vector("list",sim_n)
pgs_trans <- vector("list",sim_n)
colname <- colnames(nones.exp.data)[2]
pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs$orf_name) %>%
    dplyr::select(`Allele Gene name`) %>%
    pull()
pgs_ <- sample.nones(pgs_allele,nones.exp.data,colname)
for (i in 1:sim_n) {

  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))


  allnet <- induced.subgraph(nw.del, V(nw.del)[V(nw.del)$name%in%allnw.list]) %>% 
       delete.vertices(which(degree(.)==0))
  # nones.exp.data <- nones.exp.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num)
  #
  # exp.number.data.filtered <- exp.number.data %>%
  #   add.degree(allnet, colname) %>%
  #   mutate(int_density = deg / num) %>%
  #   filter(Allele.Gene.name %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, normalized = T)
  #allnet.eig <- eigen_centrality(allnet)$vector
allnet.trans <- transitivity(allnet,'local')
  names(allnet.trans) <- names(allnet.deg)
  #nn <- pgs.subnetwork(allnet, pgs$orf_name)

  for (t in 1:100) {
    allnet_re <- rewire(allnet, with = keeping_degseq(niter = 10*vcount(allnet)))
    allnet.deg.rnd <- degree(allnet_re)
    allnet.btw.rnd <- betweenness(allnet_re,normalized = T)
    allnet.trans.rnd <- transitivity(allnet_re,'local')
    names(allnet.trans.rnd) <- V(allnet_re)$name

    pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_])
    pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_])
    pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_])
    rand_degree[[i]][t] <- list(allnet.deg.rnd[names(allnet.deg.rnd)%in%pgs_])
    rand_betweenness[[i]][t] <- list(allnet.btw.rnd[names(allnet.btw.rnd)%in%pgs_])
    rand_trans[[i]][t] <- list(allnet.trans.rnd[names(allnet.trans.rnd)%in%pgs_])
  }
}


rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_degree <- unlist(rand_degree)

rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_betweenness <- unlist(rand_betweenness)

rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_trans <- unlist(rand_trans)

pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_degree <- unlist(pgs_degree)

pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_betweenness <- unlist(pgs_betweenness)

pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_trans <- unlist(pgs_trans)


plot_data(pgs_degree_mean,rand_degree_mean,rand_degree_mean,pgs_degree,rand_degree,rand_degree,title="Degree")
plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_betweenness_mean,pgs_betweenness,rand_betweenness,rand_betweenness,title="Betweenness",binwidth = 0.0001,xlim = 0)
plot_data(pgs_trans_mean,rand_trans_mean,rand_trans_mean,pgs_trans,rand_trans,rand_trans,title="Transitivity",xlim=0,binwidth = 0.01)

plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_betweenness,title="Betwenness")
plot_data_ecdf(pgs_trans,rand_trans,rand_trans,title = "Transitivity")
source('R/dist.sample.R')
sim_n=10
rand_degree <- vector("list",sim_n)
rand_betweenness <- vector("list",sim_n)
rand_trans <- vector("list",sim_n)
rand_ess_degree <- vector("list",sim_n)
rand_ess_betweenness <- vector("list",sim_n)
rand_ess_trans <- vector("list",sim_n)
pgs_degree <- vector("list",sim_n)
pgs_betweenness <- vector("list",sim_n)
pgs_trans <- vector("list",sim_n)
colname <- colnames(nones.exp.data)[2]
for (i in 1:sim_n) {
  allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))

  pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs$orf_name) %>%
    select(`Allele Gene name`) %>%
    pull()
  allnet <- induced.subgraph(nw.del, V(nw.del)[V(nw.del)$name%in%allnw.list]) %>% 
       delete.vertices(which(degree(.)==0))

  nones.exp.data <- nones.exp.data %>%
    add.degree(allnet, colname) %>%
    dplyr::mutate(int_density = deg / num)

  exp.number.data.filtered <- exp.number.data %>%
      add.degree(allnet, colname) %>%
      mutate(int_density = deg / num) %>%
      filter(`Allele Gene name` %in% allnw.list)


  allnet.deg <- degree(allnet)
  allnet.btw <- betweenness(allnet, weights = NA,normalized = T)
 # allnet.eig <- eigen_centrality(allnet)$vector
  allnet.trans <- transitivity(allnet,'local')
  names(allnet.trans) <- names(allnet.deg)
  # nn<-pgs.subnetwork(allnet, pgs$orf_name)
  # giant_comp_nn <- induced_subgraph(nn,V(nn)[components(nn)$membership==which.max(components(nn)$csize)])
  # giant_nodes <- append(giant_nodes,vcount(giant_comp_nn))
  # giant_edges <- append(giant_edges,ecount(giant_comp_nn))
  for (t in 1:100) {
    s <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=F)
    s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T)
     ess.s <-  sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele%in% V(allnet)$name])),colname,array=FALSE,dist=F)
    ess.s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele %in% V(allnet)$name])),colname,array=FALSE,dist=T)

    rand_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%s])
    rand_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%s.dist])
    rand_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%s.dist])
    pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_allele])
    pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_allele])
    pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_allele])
    rand_ess_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%ess.s])
    rand_ess_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%ess.s.dist])
    rand_ess_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%ess.s.dist])

  }
  print(i)
}

rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_degree <- unlist(rand_degree)

rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_betweenness <- unlist(rand_betweenness)

rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_trans <- unlist(rand_trans)

rand_ess_degree_mean <- map(rand_ess_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_degree <- unlist(rand_ess_degree)

rand_ess_betweenness_mean <- map(rand_ess_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_betweenness <- unlist(rand_ess_betweenness)

rand_ess_trans_mean <- map(rand_ess_trans,function(x) map(x,mean,na.rm=T)) %>% unlist()
rand_ess_trans <- unlist(rand_ess_trans)

pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_degree <- unlist(pgs_degree)

pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_betweenness <- unlist(pgs_betweenness)

pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_trans <- unlist(pgs_trans)
d1=plot_data(pgs_degree_mean,rand_degree_mean,rand_ess_degree_mean,pgs_degree,rand_degree,rand_ess_degree,title = 'degree',binwidth=2)[[1]]
d2=plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_ess_betweenness_mean,pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'betweenness')[[1]]
d3=plot_data(pgs_trans_mean,rand_trans_mean,rand_ess_trans_mean,pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'transitivity')[[1]]

plot_grid(d1,d2,d3,ncol=3)
degreefullplot=bind_rows(data.frame(data=allnet.deg[names(allnet.deg)%in%pgs_allele],group='pgs'),data.frame(data=allnet.deg[names(allnet.deg)%in%nones.exp.data$`Allele Gene name`],group='nones'),data.frame(data=allnet.deg[names(allnet.deg)%in%essentials],group='ess')) %>%
  ggplot(aes(x=data,color=group))+geom_line(aes(y = 1 - ..y..), stat='ecdf')+scale_x_log10()+scale_y_log10()+ annotation_logticks()+ labs(color = "") + scale_color_manual(values = c("#00BFC4", "#FF3300", "orange")) + theme(legend.position = "none", axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30)) + # scale_fill_discrete(name='')+
    ylab("Percentage") + xlab('') + scale_y_continuous(labels = scales::percent)

btwfullplot=bind_rows(data.frame(data=allnet.btw[names(allnet.btw)%in%pgs_allele],group='pgs'),data.frame(data=allnet.btw[names(allnet.btw)%in%nones.exp.data$`Allele Gene name`],group='nones'),data.frame(data=allnet.btw[names(allnet.btw)%in%essentials],group='ess')) %>%
  ggplot(aes(x=data,color=group))+geom_line(aes(y = 1 - ..y..), stat='ecdf')+scale_x_log10()+scale_y_log10()+ annotation_logticks()+ labs(color = "") + scale_color_manual(values = c("#00BFC4", "#FF3300", "orange")) + theme(legend.position = "none", axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30)) + # scale_fill_discrete(name='')+
    ylab("Percentage") + xlab('') + scale_y_continuous(labels = scales::percent)

transfullplot=bind_rows(data.frame(data=allnet.trans[names(allnet.trans)%in%pgs_allele],group='pgs'),data.frame(data=allnet.trans[names(allnet.trans)%in%nones.exp.data$`Allele Gene name`],group='nones'),data.frame(data=allnet.trans[names(allnet.trans)%in%essentials],group='ess')) %>%
  ggplot(aes(x=data,color=group))+geom_line(aes(y = 1 - ..y..), stat='ecdf')+scale_x_log10()+scale_y_log10()+ annotation_logticks()+ labs(color = "") + scale_color_manual(values = c("#00BFC4", "#FF3300", "orange")) + theme(legend.position = "none", axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30)) + # scale_fill_discrete(name='')+
    ylab("Percentage") + xlab('') + scale_y_continuous(labels = scales::percent)

plot_grid(degreefullplot,btwfullplot,transfullplot,ncol=3)
#   






d1=plot_data_ecdf(pgs_degree,rand_degree,rand_ess_degree,title = 'Degree',binwidth=2,logscale = T)
d2=plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'Betweenness',logscale = F)
d3=plot_data_ecdf(pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'Transitivity',logscale = F)

plot_grid(d1,d2,d3,ncol=3)  


pdf('analysis/figures/0816_paper/pcc_data_cdf_cost_78.pdf',width = 7,height = 3)
plot_grid(d1,d2,d3,ncol=3)
dev.off()

png('analysis/figures/0805_paper/pcc_data_cdf_cost_78.png',width = 2000,height = 1200)
plot_grid(d1,d2,d3,ncol=3)
dev.off()
nw.edgelist <- gather(as.data.frame(adj_res) %>%rownames_to_column(),key='gene2',value='weight',-rowname) %>% filter(rowname!=gene2 & weight>=0.2 &str_detect(rowname,'supp')==F&rowname%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]==F&
                                                                                                                       str_detect(gene2,'supp')==F&gene2%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]==F) %>% drop_na()
nw <- graph_from_data_frame(nw.edgelist,directed = F) %>% igraph::simplify()
nw.tbl <- nw %>% get.edgelist() %>% as_tibble()
nw.tbl$weight <- E(nw)$weight
#write_delim(nw.tbl,'analysis/data/derived_data/mypcc_network.tsv',delim='\t',col_names=F)
nrow(nw.tbl)
#nw.edgelist#data.frame(gene1='a',gene2='b',weight=as.numeric(adj_res[lower.tri(adj_res)])) %>% drop_na() %>% filter(weight!=0)
#write_graph(nw.del,'~/Main/pgsNetwork/analysis/data/derived_data/mypcc_network.el',format = 'edgelist')
#net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)# & `Double mutant fitness standard deviation`<=0.1)
pgs_allele <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs$orf_name) %>%
    select(`Allele Gene name`) %>%
    pull()
net=nw.del#graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F)
pgs.subnetwork(net,pgs_allele) # %>% write_graph('analysis/figures/0805_paper/pcc.graphml',format='graphml')
library(RCy3)
net=nw.del#graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F)

ybr <- neighborhood(net,order=1,nodes='ybr196c-a')
ybr_sub <- induced_subgraph(net,ybr[[1]]) 
createNetworkFromIgraph(ybr_sub,new.title='ybr')
layoutNetwork('force-directed defaultSpringLength=70 defaultSpringCoefficient=0.000003')#degree(net,'ybr196c-a')
RColorBrewer::brewer.pal(length(E(ybr_sub)$weight %>% unique()),'Paired')
setEdgeColorMapping('linkcom',colors=RColorBrewer::brewer.pal(length(E(ybr_sub)$weight %>% unique()),'Paired'),mapping.type = 'd')
#sample.nones('ybr196c-a',nones.exp.data %>% add.degree(net,colnames(nones.exp.data)[2]),colnames(nones.exp.data)[2],dist=T)

hri <- neighborhood(net,order=1,nodes='sec13-1')
sec_sub <- induced_subgraph(net,hri[[1]])


oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.