This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
set.seed(1)
library(tidyverse) library(igraph) library(ggsignif) library(cowplot) library(data.table) source("R/pgs.subnetwork.R") source("R/add.degree.R") source("R/sample.nones.R") source("R/plot_data.R") getwd() #set.seed(1) pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv") #read_csv("~/Main/pgsNetwork/data/0701_proto-genes_list") #pgs <- read_csv("~/Downloads/omersyntenyfilter.csv") overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv") strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") costanzo=T if(costanzo==F){ #read my pcc values ndund1='analysis/data/derived_data/pcc_calculations_rcpp/nodelist_und1' und1pcc <- 'analysis/data/derived_data/pcc_calculations_rcpp/und12avgpcc' ndavgnorm <- 'analysis/data/derived_data/pcc_calculations_rcpp/ndlistavgpcc_calc_normalized_matrix.csv' avgnorm <- 'analysis/data/derived_data/pcc_calculations_rcpp/avgpcc_calc_normalized_matrix.csv' pcc0703 <- 'analysis/data/derived_data/pcc_calculations_rcpp/pcc_calc_0703' ndlist0703 <- 'analysis/data/derived_data/pcc_calculations_rcpp/nodelist_0703' pccnorm <- 'analysis/data/derived_data/pcc_calculations/pcc_calc_normalized_matrix.csv' ndlistpccnorm <- 'analysis/data/derived_data/pcc_calculations/ndlistpcc_calc_normalized_matrix.csv' # ndlist <- fread(ndlist_fname,header=F) # adj_res <- fread(adj_res_fname,header=F) adj_res <- fread(pcc0703,header=F) ndlist <- fread(ndlist0703,header=F) adj_res <- as.matrix(adj_res) colnames(adj_res) <- ndlist$V1 rownames(adj_res) <- ndlist$V1 adj_res_graph <- graph_from_adjacency_matrix(adj_res,weighted=TRUE,mode='undirected',diag=F) g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name])) g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight))) g_final <- delete.vertices(g_final,which(degree(g_final)<1 )) pcc.net <- g_final#graph_from_data_frame(df.pcc,F) #E(pcc.net)$weight <- df.pcc$pcc nw.del<-igraph::simplify(pcc.net,remove.loops = T) }else{ costanzo_matrix_data <- fread('analysis/data/raw_data//pcc_ALL.txt',header=T) costanzo_matrix <- costanzo_matrix_data[-1,-c(1,2)]%>%as.matrix() rownames(costanzo_matrix) <- colnames(costanzo_matrix) class(costanzo_matrix) <- "numeric" adj_res_graph <- graph_from_adjacency_matrix(costanzo_matrix,weighted=TRUE,mode='undirected',diag=F) g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))# # load('~/Main/anne/network_analysis/df.pcc.Rdata') # df.pcc <- df.pcc %>% filter((pcc)>=0.2) %>% drop_na() g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight))) g_final <- delete.vertices(g_final,which(degree(g_final)<1 )) pcc.net <- g_final#graph_from_data_frame(df.pcc,F) # E(pcc.net)$weight <- df.pcc$pcc nw.del<-igraph::simplify(pcc.net,remove.loops = T) } allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat)) try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,] lengths <- lapply(try$different.alleles, length) try.multiple <- try[lengths > 1, ] try.single <- try[lengths == 1, ] ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F) essentials <- exp.number.data$`Allele Gene name`[exp.number.data$maincat=='essential'] #read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull()
sim_n=10 rand_degree <- vector("list",sim_n) rand_betweenness <- vector("list",sim_n) rand_trans <- vector("list",sim_n) pgs_degree <- vector("list",sim_n) pgs_betweenness <- vector("list",sim_n) pgs_trans <- vector("list",sim_n) colname <- colnames(nones.exp.data)[2] pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs$orf_name) %>% dplyr::select(`Allele Gene name`) %>% pull() pgs_ <- sample.nones(pgs_allele,nones.exp.data,colname) for (i in 1:sim_n) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) allnet <- induced.subgraph(nw.del, V(nw.del)[V(nw.del)$name%in%allnw.list]) %>% delete.vertices(which(degree(.)==0)) # nones.exp.data <- nones.exp.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) # # exp.number.data.filtered <- exp.number.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) %>% # filter(Allele.Gene.name %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, normalized = T) #allnet.eig <- eigen_centrality(allnet)$vector allnet.trans <- transitivity(allnet,'local') names(allnet.trans) <- names(allnet.deg) #nn <- pgs.subnetwork(allnet, pgs$orf_name) for (t in 1:100) { allnet_re <- rewire(allnet, with = keeping_degseq(niter = 10*vcount(allnet))) allnet.deg.rnd <- degree(allnet_re) allnet.btw.rnd <- betweenness(allnet_re,normalized = T) allnet.trans.rnd <- transitivity(allnet_re,'local') names(allnet.trans.rnd) <- V(allnet_re)$name pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_]) pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_]) pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_]) rand_degree[[i]][t] <- list(allnet.deg.rnd[names(allnet.deg.rnd)%in%pgs_]) rand_betweenness[[i]][t] <- list(allnet.btw.rnd[names(allnet.btw.rnd)%in%pgs_]) rand_trans[[i]][t] <- list(allnet.trans.rnd[names(allnet.trans.rnd)%in%pgs_]) } } rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_degree <- unlist(rand_degree) rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_betweenness <- unlist(rand_betweenness) rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_trans <- unlist(rand_trans) pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_degree <- unlist(pgs_degree) pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_betweenness <- unlist(pgs_betweenness) pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_trans <- unlist(pgs_trans) plot_data(pgs_degree_mean,rand_degree_mean,rand_degree_mean,pgs_degree,rand_degree,rand_degree,title="Degree") plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_betweenness_mean,pgs_betweenness,rand_betweenness,rand_betweenness,title="Betweenness",binwidth = 0.0001,xlim = 0) plot_data(pgs_trans_mean,rand_trans_mean,rand_trans_mean,pgs_trans,rand_trans,rand_trans,title="Transitivity",xlim=0,binwidth = 0.01) plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_betweenness,title="Betwenness") plot_data_ecdf(pgs_trans,rand_trans,rand_trans,title = "Transitivity")
source('R/dist.sample.R') sim_n=10 rand_degree <- vector("list",sim_n) rand_betweenness <- vector("list",sim_n) rand_trans <- vector("list",sim_n) rand_ess_degree <- vector("list",sim_n) rand_ess_betweenness <- vector("list",sim_n) rand_ess_trans <- vector("list",sim_n) pgs_degree <- vector("list",sim_n) pgs_betweenness <- vector("list",sim_n) pgs_trans <- vector("list",sim_n) colname <- colnames(nones.exp.data)[2] for (i in 1:sim_n) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs$orf_name) %>% select(`Allele Gene name`) %>% pull() allnet <- induced.subgraph(nw.del, V(nw.del)[V(nw.del)$name%in%allnw.list]) %>% delete.vertices(which(degree(.)==0)) nones.exp.data <- nones.exp.data %>% add.degree(allnet, colname) %>% dplyr::mutate(int_density = deg / num) exp.number.data.filtered <- exp.number.data %>% add.degree(allnet, colname) %>% mutate(int_density = deg / num) %>% filter(`Allele Gene name` %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, weights = NA,normalized = T) # allnet.eig <- eigen_centrality(allnet)$vector allnet.trans <- transitivity(allnet,'local') names(allnet.trans) <- names(allnet.deg) # nn<-pgs.subnetwork(allnet, pgs$orf_name) # giant_comp_nn <- induced_subgraph(nn,V(nn)[components(nn)$membership==which.max(components(nn)$csize)]) # giant_nodes <- append(giant_nodes,vcount(giant_comp_nn)) # giant_edges <- append(giant_edges,ecount(giant_comp_nn)) for (t in 1:100) { s <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=F) s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T) ess.s <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele%in% V(allnet)$name])),colname,array=FALSE,dist=F) ess.s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele %in% V(allnet)$name])),colname,array=FALSE,dist=T) rand_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%s]) rand_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%s.dist]) rand_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%s.dist]) pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_allele]) pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_allele]) pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_allele]) rand_ess_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%ess.s]) rand_ess_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%ess.s.dist]) rand_ess_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%ess.s.dist]) } print(i) } rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_degree <- unlist(rand_degree) rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_betweenness <- unlist(rand_betweenness) rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_trans <- unlist(rand_trans) rand_ess_degree_mean <- map(rand_ess_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_ess_degree <- unlist(rand_ess_degree) rand_ess_betweenness_mean <- map(rand_ess_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_ess_betweenness <- unlist(rand_ess_betweenness) rand_ess_trans_mean <- map(rand_ess_trans,function(x) map(x,mean,na.rm=T)) %>% unlist() rand_ess_trans <- unlist(rand_ess_trans) pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_degree <- unlist(pgs_degree) pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_betweenness <- unlist(pgs_betweenness) pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_trans <- unlist(pgs_trans)
d1=plot_data(pgs_degree_mean,rand_degree_mean,rand_ess_degree_mean,pgs_degree,rand_degree,rand_ess_degree,title = 'degree',binwidth=2)[[1]] d2=plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_ess_betweenness_mean,pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'betweenness')[[1]] d3=plot_data(pgs_trans_mean,rand_trans_mean,rand_ess_trans_mean,pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'transitivity')[[1]] plot_grid(d1,d2,d3,ncol=3)
degreefullplot=bind_rows(data.frame(data=allnet.deg[names(allnet.deg)%in%pgs_allele],group='pgs'),data.frame(data=allnet.deg[names(allnet.deg)%in%nones.exp.data$`Allele Gene name`],group='nones'),data.frame(data=allnet.deg[names(allnet.deg)%in%essentials],group='ess')) %>% ggplot(aes(x=data,color=group))+geom_line(aes(y = 1 - ..y..), stat='ecdf')+scale_x_log10()+scale_y_log10()+ annotation_logticks()+ labs(color = "") + scale_color_manual(values = c("#00BFC4", "#FF3300", "orange")) + theme(legend.position = "none", axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30)) + # scale_fill_discrete(name='')+ ylab("Percentage") + xlab('') + scale_y_continuous(labels = scales::percent) btwfullplot=bind_rows(data.frame(data=allnet.btw[names(allnet.btw)%in%pgs_allele],group='pgs'),data.frame(data=allnet.btw[names(allnet.btw)%in%nones.exp.data$`Allele Gene name`],group='nones'),data.frame(data=allnet.btw[names(allnet.btw)%in%essentials],group='ess')) %>% ggplot(aes(x=data,color=group))+geom_line(aes(y = 1 - ..y..), stat='ecdf')+scale_x_log10()+scale_y_log10()+ annotation_logticks()+ labs(color = "") + scale_color_manual(values = c("#00BFC4", "#FF3300", "orange")) + theme(legend.position = "none", axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30)) + # scale_fill_discrete(name='')+ ylab("Percentage") + xlab('') + scale_y_continuous(labels = scales::percent) transfullplot=bind_rows(data.frame(data=allnet.trans[names(allnet.trans)%in%pgs_allele],group='pgs'),data.frame(data=allnet.trans[names(allnet.trans)%in%nones.exp.data$`Allele Gene name`],group='nones'),data.frame(data=allnet.trans[names(allnet.trans)%in%essentials],group='ess')) %>% ggplot(aes(x=data,color=group))+geom_line(aes(y = 1 - ..y..), stat='ecdf')+scale_x_log10()+scale_y_log10()+ annotation_logticks()+ labs(color = "") + scale_color_manual(values = c("#00BFC4", "#FF3300", "orange")) + theme(legend.position = "none", axis.title.y = element_text(size = 30), axis.title.x = element_text(size = 30)) + # scale_fill_discrete(name='')+ ylab("Percentage") + xlab('') + scale_y_continuous(labels = scales::percent) plot_grid(degreefullplot,btwfullplot,transfullplot,ncol=3) # d1=plot_data_ecdf(pgs_degree,rand_degree,rand_ess_degree,title = 'Degree',binwidth=2,logscale = T) d2=plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'Betweenness',logscale = F) d3=plot_data_ecdf(pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'Transitivity',logscale = F) plot_grid(d1,d2,d3,ncol=3) pdf('analysis/figures/0816_paper/pcc_data_cdf_cost_78.pdf',width = 7,height = 3) plot_grid(d1,d2,d3,ncol=3) dev.off() png('analysis/figures/0805_paper/pcc_data_cdf_cost_78.png',width = 2000,height = 1200) plot_grid(d1,d2,d3,ncol=3) dev.off()
nw.edgelist <- gather(as.data.frame(adj_res) %>%rownames_to_column(),key='gene2',value='weight',-rowname) %>% filter(rowname!=gene2 & weight>=0.2 &str_detect(rowname,'supp')==F&rowname%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]==F& str_detect(gene2,'supp')==F&gene2%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]==F) %>% drop_na() nw <- graph_from_data_frame(nw.edgelist,directed = F) %>% igraph::simplify() nw.tbl <- nw %>% get.edgelist() %>% as_tibble() nw.tbl$weight <- E(nw)$weight #write_delim(nw.tbl,'analysis/data/derived_data/mypcc_network.tsv',delim='\t',col_names=F) nrow(nw.tbl) #nw.edgelist#data.frame(gene1='a',gene2='b',weight=as.numeric(adj_res[lower.tri(adj_res)])) %>% drop_na() %>% filter(weight!=0) #write_graph(nw.del,'~/Main/pgsNetwork/analysis/data/derived_data/mypcc_network.el',format = 'edgelist')
#net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)# & `Double mutant fitness standard deviation`<=0.1) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs$orf_name) %>% select(`Allele Gene name`) %>% pull() net=nw.del#graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F) pgs.subnetwork(net,pgs_allele) # %>% write_graph('analysis/figures/0805_paper/pcc.graphml',format='graphml')
library(RCy3) net=nw.del#graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F) ybr <- neighborhood(net,order=1,nodes='ybr196c-a') ybr_sub <- induced_subgraph(net,ybr[[1]]) createNetworkFromIgraph(ybr_sub,new.title='ybr') layoutNetwork('force-directed defaultSpringLength=70 defaultSpringCoefficient=0.000003')#degree(net,'ybr196c-a') RColorBrewer::brewer.pal(length(E(ybr_sub)$weight %>% unique()),'Paired') setEdgeColorMapping('linkcom',colors=RColorBrewer::brewer.pal(length(E(ybr_sub)$weight %>% unique()),'Paired'),mapping.type = 'd') #sample.nones('ybr196c-a',nones.exp.data %>% add.degree(net,colnames(nones.exp.data)[2]),colnames(nones.exp.data)[2],dist=T) hri <- neighborhood(net,order=1,nodes='sec13-1') sec_sub <- induced_subgraph(net,hri[[1]])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.