This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
set.seed(1)
library(tidyverse) library(igraph) library(ggsignif) library(cowplot) source("R/pgs.subnetwork.R") source("R/add.degree.R") source("R/sample.nones.R") source("R/plot_data.R") pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv") #pgs <- read_csv("~/Downloads/omersyntenyfilter.csv") overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv") net.df <- readRDS("analysis/data/derived_data/SGA_data_combined.rds.gz") strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE) net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)#& `Double mutant fitness standard deviation`<=0.1) allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat)) try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,] lengths <- lapply(try$different.alleles, length) try.multiple <- try[lengths > 1, ] try.single <- try[lengths == 1, ] ess.names <- exp.data$`Systematic gene name`[exp.data$maincat=='essential']
sim_n=10 rand_degree <- vector("list",sim_n) rand_betweenness <- vector("list",sim_n) rand_trans <- vector("list",sim_n) pgs_degree <- vector("list",sim_n) pgs_betweenness <- vector("list",sim_n) pgs_trans <- vector("list",sim_n) colname <- colnames(nones.exp.data)[1] pgs_ <- data.frame(orf_name=sample.nones(pgs$orf_name,nones.exp.data,colname)) for (i in 1:sim_n) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list) net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)]) allnet <- graph_from_edgelist(net_edgelist, directed = F) allnet <- simplify(allnet) # nones.exp.data <- nones.exp.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) # # exp.number.data.filtered <- exp.number.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) %>% # filter(Allele.Gene.name %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, normalized = T) #allnet.eig <- eigen_centrality(allnet)$vector allnet.trans <- transitivity(allnet,'local') names(allnet.trans) <- names(allnet.deg) #nn <- pgs.subnetwork(allnet, pgs$orf_name) for (t in 1:100) { allnet_re <- rewire(allnet, with = keeping_degseq(niter = 10*vcount(allnet))) allnet.deg.rnd <- degree(allnet_re) allnet.btw.rnd <- betweenness(allnet_re,normalized = T) allnet.trans.rnd <- transitivity(allnet_re,'local') names(allnet.trans.rnd) <- V(allnet_re)$name pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs$orf_name]) pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs$orf_name]) pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs$orf_name]) rand_degree[[i]][t] <- list(allnet.deg.rnd[names(allnet.deg.rnd)%in%pgs$orf_name]) rand_betweenness[[i]][t] <- list(allnet.btw.rnd[names(allnet.btw.rnd)%in%pgs$orf_name]) rand_trans[[i]][t] <- list(allnet.trans.rnd[names(allnet.trans.rnd)%in%pgs$orf_name]) } } rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_degree <- unlist(rand_degree) rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_betweenness <- unlist(rand_betweenness) rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_trans <- unlist(rand_trans) pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_degree <- unlist(pgs_degree) pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_betweenness <- unlist(pgs_betweenness) pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_trans <- unlist(pgs_trans) boxplot(pgs_degree %>% map(mean,na.rm=T) %>% unlist(),map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist()) boxplot(pgs_betweenness %>% map(mean,na.rm=T) %>% unlist(),map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist()) boxplot(pgs_trans %>% map(mean,na.rm=T) %>% unlist(),map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist()) plot_data(pgs_degree_mean,rand_degree_mean,rand_degree_mean,pgs_degree,rand_degree,rand_degree,title="Degree") plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_betweenness_mean,pgs_betweenness,rand_betweenness,rand_betweenness,title="Betweenness",binwidth = 0.0001,xlim = 0) plot_data(pgs_trans_mean,rand_trans_mean,rand_trans_mean,pgs_trans,rand_trans,rand_trans,title="Transitivity",xlim=0,binwidth = 0.01) plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_betweenness,title="Betwenness") plot_data_ecdf(pgs_trans,rand_trans,rand_trans,title = "Transitivity")
sim_n=100 rand_degree <- vector("list",sim_n) rand_betweenness <- vector("list",sim_n) rand_trans <- vector("list",sim_n) pgs_degree <- vector("list",sim_n) pgs_betweenness <- vector("list",sim_n) pgs_trans <- vector("list",sim_n) colname <- colnames(nones.exp.data)[1] for (i in 1:sim_n) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list) pgs_ <- data.frame(orf_name=sample.nones(pgs$orf_name,nones.exp.data,colname)) net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)]) allnet <- graph_from_edgelist(net_edgelist, directed = F) allnet <- simplify(allnet) # nones.exp.data <- nones.exp.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) # # exp.number.data.filtered <- exp.number.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) %>% # filter(Allele.Gene.name %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, normalized = T) #allnet.eig <- eigen_centrality(allnet)$vector allnet.trans <- transitivity(allnet,'local') names(allnet.trans) <- names(allnet.deg) #nn <- pgs.subnetwork(allnet, pgs$orf_name) for (t in 1:10) { allnet_re <- rewire(allnet, with = keeping_degseq(niter = 10*vcount(allnet))) allnet.deg.rnd <- degree(allnet_re) allnet.btw.rnd <- betweenness(allnet_re,normalized = T) allnet.trans.rnd <- transitivity(allnet_re,'local') names(allnet.trans.rnd) <- V(allnet_re)$name pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_$orf_name]) pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_$orf_name]) pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_$orf_name]) rand_degree[[i]][t] <- list(allnet.deg.rnd[names(allnet.deg.rnd)%in%pgs_$orf_name]) rand_betweenness[[i]][t] <- list(allnet.btw.rnd[names(allnet.btw.rnd)%in%pgs_$orf_name]) rand_trans[[i]][t] <- list(allnet.trans.rnd[names(allnet.trans.rnd)%in%pgs_$orf_name]) } } rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_degree <- unlist(rand_degree) rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_betweenness <- unlist(rand_betweenness) rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_trans <- unlist(rand_trans) pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_degree <- unlist(pgs_degree) pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_betweenness <- unlist(pgs_betweenness) pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_trans <- unlist(pgs_trans) plot_data(pgs_degree_mean,rand_degree_mean,rand_degree_mean,pgs_degree,rand_degree,rand_degree,title="Degree") plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_betweenness_mean,pgs_betweenness,rand_betweenness,rand_betweenness,title="Betweenness",binwidth = 0.0001,xlim = 0) plot_data(pgs_trans_mean,rand_trans_mean,rand_trans_mean,pgs_trans,rand_trans,rand_trans,title="Transitivity",xlim=0,binwidth = 0.01) plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_betweenness,title="Betwenness") plot_data_ecdf(pgs_trans,rand_trans,rand_trans,title = "Transitivity") # checkRand <- function(allnet, allnet_re){ # el_allnet_re=get.edgelist(allnet_re) %>% as.data.frame %>% group_by(V1,V2) %>% arrange(V1,V2) # el_allnet=get.edgelist(allnet) %>% as.data.frame %>% group_by(V1,V2) %>% arrange(V1,V2) # elcheck=paste0(el_allnet$V1,el_allnet$V2)%in%paste0(el_allnet_re$V1,el_allnet_re$V2) # sum(elcheck)/length(elcheck) # # # }
rand_giant_nodes <- c() rand_giant_edges <- c() rand_giant_nodes_es <- c() rand_giant_edges_es <- c() giant_nodes <- c() giant_edges <- c() colname <- colnames(nones.exp.data)[1] for (i in 1:20) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list) net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)]) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs) %>% select(`Allele Gene name`) %>% pull() allnet <- graph_from_edgelist(net_edgelist, directed = F) allnet <- simplify(allnet) nones.exp.data <- nones.exp.data %>% add.degree(allnet, colname) %>% dplyr::mutate(int_density = deg / num) exp.number.data.filtered <- exp.data %>% add.degree(allnet, colname) %>% mutate(int_density = deg / num) %>% filter(`Allele Gene name` %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, normalized = T) allnet.eig <- eigen_centrality(allnet)$vector nn <- pgs.subnetwork(allnet, pgs$orf_name) giant_comp_nn <- induced_subgraph(nn, V(nn)[components(nn)$membership == which.max(components(nn)$csize)]) # giant_nodes <- append(giant_nodes,vcount(giant_comp_nn)) # giant_edges <- append(giant_edges,ecount(giant_comp_nn)) for (t in 1:100) { giant_nodes <- append(giant_nodes, vcount(giant_comp_nn)) giant_edges <- append(giant_edges, ecount(giant_comp_nn)) s <- sample.nones(pgs$orf_name[pgs$orf_name %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T) nn_re <- pgs.subnetwork(allnet, s) giant_comp_nn_re <- induced_subgraph(nn_re, V(nn_re)[components(nn_re)$membership == which.max(components(nn_re)$csize)]) rand_giant_nodes <- append(rand_giant_nodes, vcount(giant_comp_nn_re)) rand_giant_edges <- append(rand_giant_edges, ecount(giant_comp_nn_re)) ess.s <- sample.nones(pgs$orf_name[pgs$orf_name %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%ess.names)|(!!sym(colname)%in%pgs$orf_name[pgs$orf_name %in% V(allnet)$name])),colname,array=FALSE,dist=T) #sample(V(allnet)$name[V(allnet)$name %in% ess.names$X1], length(pgs$orf_name[pgs$orf_name %in% V(allnet)$name])) nn_es <- pgs.subnetwork(allnet, ess.s) giant_comp_nn_es <- induced_subgraph(nn_es, V(nn_es)[components(nn_es)$membership == which.max(components(nn_es)$csize)]) rand_giant_nodes_es <- append(rand_giant_nodes_es, vcount(giant_comp_nn_es)) rand_giant_edges_es <- append(rand_giant_edges_es, ecount(giant_comp_nn_es)) } } # boxplot(giant_nodes,rand_giant_nodes,rand_giant_nodes_es) # boxplot(giant_edges,rand_giant_edges,rand_giant_edges_es) giant_comp_df <- bind_rows(data.frame(data = giant_nodes, group = "proto-gene"), data.frame(data = rand_giant_nodes, group = "nonessential"), data.frame(data = rand_giant_nodes_es, group = "essential")) giant_comp_df <- bind_rows(data.frame(data = giant_edges, group = "proto-gene"), data.frame(data = rand_giant_edges, group = "nonessential"), data.frame(data = rand_giant_edges_es, group = "essential")) giant_comp_df$group <- factor(giant_comp_df$group, levels = c("proto-gene", "nonessential", "essential")) # ggplot(giant_comp_df,aes(x=group,y=data,fill=group))+geom_boxplot() p12 <- 2 * sum(abs(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) - abs(giant_comp_df$data[giant_comp_df$group == "nonessential"]) >= 0) / length(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) p13 <- 2 * sum(abs(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) - abs(giant_comp_df$data[giant_comp_df$group == "essential"]) >= 0) / length(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) p23 <- 2 * sum(abs(giant_comp_df$data[giant_comp_df$group == "nonessential"]) - abs(giant_comp_df$data[giant_comp_df$group == "essential"]) >= 0) / length(giant_comp_df$data[giant_comp_df$group == "proto-gene"]) # function for number of observations give.n <- function(x){ return(c(y = median(x)*1.05, label = length(x))) # experiment with the multiplier to find the perfect position } ggplot(giant_comp_df, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot(outlier.shape = NA) + scale_x_discrete(labels = c("Proto-genes", "Non-essential Genes", "Essential Genes")) + theme_bw() + ylab("Giant component size") + xlab("") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size = 16) ) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))# + ggsave('analysis/figures/0816_paper/giantcompsize_withdegedges.pdf') geom_signif( annotations = c("ns", "ns", "ns"), y_position = c(235, 250, 240), xmin = c(1, 1, 2), xmax = c(2, 3, 3)) #+ggtitle(itr) # plotBoxSign(giant_comp_df)
source('R/dist.sample.R') sim_n=10 essentials <- c(net_df_significant$`Query Strain ID`[net_df_significant$data_source=='ExE'],net_df_significant$`Array Strain ID`[net_df_significant$data_source=='ExE']) %>% unique()#net.df%>%filter(data_source=='ExE') rand_degree <- vector("list",sim_n) rand_betweenness <- vector("list",sim_n) rand_trans <- vector("list",sim_n) rand_ess_degree <- vector("list",sim_n) rand_ess_betweenness <- vector("list",sim_n) rand_ess_trans <- vector("list",sim_n) pgs_degree <- vector("list",sim_n) pgs_betweenness <- vector("list",sim_n) pgs_trans <- vector("list",sim_n) colname <- colnames(nones.exp.data)[1] pgs_ <- pgs#[pgs$orf_name%in%nones.exp.data$`Systematic gene name`[nones.exp.data$cat=='nxes.only'],] for (i in 1:sim_n) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list) net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)]) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs) %>% select(`Allele Gene name`) %>% pull() allnet <- graph_from_edgelist(net_edgelist, directed = F) allnet <- simplify(allnet) nones.exp.data <- nones.exp.data %>% add.degree(allnet, colname) %>% dplyr::mutate(int_density = deg / num) exp.number.data.filtered <- exp.data %>% add.degree(allnet, colname) %>% mutate(int_density = deg / num) %>% filter(`Allele Gene name` %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, normalized = T,weights = NA) # allnet.eig <- eigen_centrality(allnet)$vector allnet.trans <- transitivity(allnet,'local') names(allnet.trans) <- names(allnet.deg) # nn<-pgs.subnetwork(allnet, pgs$orf_name) # giant_comp_nn <- induced_subgraph(nn,V(nn)[components(nn)$membership==which.max(components(nn)$csize)]) # giant_nodes <- append(giant_nodes,vcount(giant_comp_nn)) # giant_edges <- append(giant_edges,ecount(giant_comp_nn)) for (t in 1:100) { #cat=='nxes.only'& s <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=F,array=T) s.dist <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T) ess.s <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%ess.names)|(!!sym(colname)%in%pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name])),colname,array=FALSE,dist=F) ess.s.dist <- sample.nones(pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%ess.names)|(!!sym(colname)%in%pgs_$orf_name[pgs_$orf_name %in% V(allnet)$name])),colname,array=FALSE,dist=T) rand_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%s]) rand_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%s.dist]) rand_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%s.dist]) pgs_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%pgs_$orf_name]) pgs_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%pgs_$orf_name]) pgs_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%pgs_$orf_name]) rand_ess_degree[[i]][t] <- list(allnet.deg[names(allnet.deg)%in%ess.s]) rand_ess_betweenness[[i]][t] <- list(allnet.btw[names(allnet.btw)%in%ess.s.dist]) rand_ess_trans[[i]][t] <- list(allnet.trans[names(allnet.trans)%in%ess.s.dist]) } } rand_degree_mean <- map(rand_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_degree <- unlist(rand_degree) rand_betweenness_mean <- map(rand_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_betweenness <- unlist(rand_betweenness) rand_trans_mean <- map(rand_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_trans <- unlist(rand_trans) rand_ess_degree_mean <- map(rand_ess_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_ess_degree <- unlist(rand_ess_degree) rand_ess_betweenness_mean <- map(rand_ess_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() rand_ess_betweenness <- unlist(rand_ess_betweenness) rand_ess_trans_mean <- map(rand_ess_trans,function(x) map(x,mean,na.rm=T)) %>% unlist() rand_ess_trans <- unlist(rand_ess_trans) pgs_degree_mean <- map(pgs_degree,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_degree <- unlist(pgs_degree) pgs_betweenness_mean <- map(pgs_betweenness,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_betweenness <- unlist(pgs_betweenness) pgs_trans_mean <- map(pgs_trans,function(x) map(x,mean, na.rm=T)) %>% unlist() pgs_trans <- unlist(pgs_trans)
d1=plot_data(pgs_degree_mean,rand_degree_mean,rand_ess_degree_mean,pgs_degree,rand_degree,rand_ess_degree,title = 'degree')[[1]] d2=plot_data(pgs_betweenness_mean,rand_betweenness_mean,rand_ess_betweenness_mean,pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.0001,xlim=0,title = 'betweenness')[[1]] d3=plot_data(pgs_trans_mean,rand_trans_mean,rand_ess_trans_mean,pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'transitivity')[[1]] plot_grid(d1,d2,d3,ncol=3) d1=plot_data_ecdf(pgs_degree,rand_degree,rand_ess_degree,title = 'Degree',binwidth=2,logscale=T) d2=plot_data_ecdf(pgs_betweenness,rand_betweenness,rand_ess_betweenness,binwidth = 0.001,xlim=0,title = 'Betweenness',logscale=F) d3=plot_data_ecdf(pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'Transitivity',logscale=F) #plot_data_ecdf(nones.exp.data$deg[nones.exp.data$`Systematic gene name`%in%pgs$orf_name],nones.exp.data$deg[nones.exp.data$`Systematic gene name`%in%pgs$orf_name==F],nones.exp.data$deg[nones.exp.data$`Systematic gene name`%in%pgs$orf_name==F],title='Interaction density') # pdf('analysis/figures/0816_paper/int_data_cdf_0.2nosd.pdf',width = 7,height = 3) plot_grid(d1,d2,d3,ncol=3) dev.off() png('analysis/figures/0805_paper/int_data_cdf_0.2nosd.png',width = 2000,height = 1200) plot_grid(d1,d2,d3,ncol=3) dev.off()
library(plyr) load("~/Main/anne/network_analysis/0409/intData.Rdata") load("~/Main/anne/network_analysis/0409/pccData.Rdata") load("~/Main/anne/network_analysis/0530essential/int_essential.RData") load("~/Main/anne/network_analysis/0530essential/pcc_essential.RData") # dff.int$group <- revalue(dff.int$group, c("neighborhood"="nodeg", "neighborhood with same distribution"="simulation")) sm.list.int$type <- revalue(sm.list.int$type, c("actual" = "Proto-gene", "simulation" = "Non-essential")) sm.list.pcc$type <- revalue(sm.list.pcc$type, c("actual" = "Proto-gene", "simulation" = "Non-essential")) sm.list.nodeg.pcc.mean <- readRDS("~/Main/anne/network_analysis/0409/sm.list.nodeg.pcc.mean.rds") sm.list.nodeg.pcc <- readRDS("~/Main/anne/network_analysis/0409/sm.list.nodeg.pcc.rds") sm.list.nodeg.int$type <- revalue(sm.list.nodeg.int$type, c("actual" = "Proto-gene", "simulation" = "Non-essential")) sm.list.nodeg.pcc$type <- revalue(sm.list.nodeg.pcc$type, c("actual" = "Proto-gene", "simulation" = "Non-essential")) d1=plot_data(sm.list.int.mean$nn.mean,sm.list.int.mean$nones.mean,int_essentialmeandf$degree,sm.list.int$degree[sm.list.int$group=='proto-gene'&sm.list.int$type=='Proto-gene'],sm.list.int$degree[sm.list.int$group=='proto-gene'&sm.list.int$type=='Non-essential'],int_essentialdf$degree[int_essentialdf$group=='proto-gene'&int_essentialdf$type=='essential'],title = 'degree')[[1]] d2=plot_data(sm.list.int.mean$nn.btw.mean,sm.list.int.mean$nones.btw.mean,int_essentialmeandf$betweenness,sm.list.int$betweenness[sm.list.int$group=='proto-gene'&sm.list.int$type=='Proto-gene'],sm.list.int$betweenness[sm.list.int$group=='proto-gene'&sm.list.int$type=='Non-essential'],int_essentialdf$betweenness[int_essentialdf$group=='proto-gene'&int_essentialdf$type=='essential'],binwidth = 0.0001,xlim=0,title = 'betweenness')[[1]] d3=plot_data(pgs_trans_mean,rand_trans_mean,rand_ess_trans_mean,pgs_trans,rand_trans,rand_ess_trans,binwidth = 0.1,xlim=0,title = 'transitivity')[[1]] plot_grid(d1,d2,d3,ncol=3)
hist(allnet.deg[s],breaks=-1:max(allnet.deg[s])) hist(allnet.deg[names(allnet.deg)%in%pgs$orf_name],breaks=-1:max(allnet.deg[names(allnet.deg)%in%pgs$orf_name])) nones.exp.data[nones.exp.data$`Systematic gene name`%in%(allnet.deg[s][allnet.deg[s]>30] %>% names()),] nones.exp.data %>% filter(`Systematic gene name` %in% pgs$orf_name&(cat=='nq.nxes')&bin>80) nones.exp.data %>% filter(`Systematic gene name` %in% pgs$orf_name&(cat=='na.only')&bin>80) nones.exp.data %>% filter((cat=='nq.nxes')&bin>80°>30) nones.exp.data %>% filter((cat=='na.only')&bin>80°>30) boxplot((nones.exp.data %>% filter((cat=='na.only')&bin>80) %>% select(deg) %>% pull()),(nones.exp.data %>% filter(`Systematic gene name` %in% pgs$orf_name&(cat=='nq.nxes')&bin>80) %>% select(deg) %>% pull())) dd=map(nones.exp.data$cat %>% unique(), function(x) nones.exp.data %>% filter(cat==x) %>% ggplot(aes(num))+geom_histogram(binwidth = 1)+geom_hline(yintercept = 1,color='red')+geom_hline(yintercept = 2,color='red')+ggtitle(x))
net_df_significant <- net_df_significant %>% mutate(group=ifelse(`Query Strain ID`%in%pgs$orf_name|`Array Strain ID`%in%pgs$orf_name, 'Proto-gene', ifelse(`Query Strain ID`%in%essentials|`Array Strain ID`%in%essentials,'Essential','Non-essential'))) ggplot(net_df_significant %>% filter(`Genetic interaction score (ε)`<=-0.08),aes(x=group,y=`Double mutant fitness standard deviation`))+#geom_boxplot() stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) ggplot(net_df_significant,aes(x=`Genetic interaction score (ε)`,y=`Double mutant fitness standard deviation`,color=group))+geom_point()+ylim(0,NA)+xlim(NA,-0.08) #ggplot(comb_nw_data, aes(x=type,y=data,color=group,fill=group,group = as.factor(group))) +`Double mutant fitness standard deviation`<0.1
rand_degree <- c() nn_degree <- c() colname <- colnames(nones.exp.data)[1] for (i in 1:10) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) net.df.randomallele <- filter(net_df_significant_sl, `Query allele name` %in% allnw.list & `Array allele name` %in% allnw.list) net_edgelist <- as.matrix(net.df.randomallele[, c(1, 3)]) pgs_allele <- strain_ids %>% filter(`Systematic gene name` %in% pgs) %>% select(`Allele Gene name`) %>% pull() allnet <- graph_from_edgelist(net_edgelist, directed = F) allnet <- simplify(allnet) nones.exp.data <- nones.exp.data %>% add.degree(allnet, colname) %>% dplyr::mutate(int_density = deg / num) # exp.number.data.filtered <- exp.number.data %>% # add.degree(allnet, colname) %>% # mutate(int_density = deg / num) %>% # filter(Allele.Gene.name %in% allnw.list) allnet.deg <- degree(allnet) allnet.btw <- betweenness(allnet, normalized = T) allnet.eig <- eigen_centrality(allnet)$vector for (t in 1:100) { s <- sample.nones(pgs$orf_name[pgs$orf_name %in% V(allnet)$name], nones.exp.data, colname) nn_degree <- append(nn_degree, mean(allnet.deg[names(allnet.deg) %in% pgs$orf_name])) rand_degree <- append(rand_degree, mean(allnet.deg[names(allnet.deg) %in% s])) } } sum(nn_degree > rand_degree) / length(nn_degree) # boxplot(giant_edges,rand_giant_edges)
pgs_eps <- net_df_significant %>% filter(`Query Strain ID` %in% pgs$orf_name | `Array Strain ID` %in% pgs$orf_name) %>% mutate(data = `Genetic interaction score (ε)`, group = "proto-genes") nones_eps <- net_df_significant %>% filter((`Query Strain ID` %in% pgs$orf_name | `Array Strain ID` %in% pgs$orf_name) == F & (`Query Strain ID` %in% nones.exp.data$`Systematic gene name` | `Array Strain ID` %in% nones.exp.data$`Systematic gene name`)) %>% mutate(data = `Genetic interaction score (ε)`, group = "Nonessential") ess_eps <- net_df_significant %>% filter((`Query Strain ID` %in% pgs$orf_name | `Array Strain ID` %in% pgs$orf_name) == F & (`Query Strain ID` %in% ess.names | `Array Strain ID` %in% ess.names)) %>% mutate(data = `Genetic interaction score (ε)`, group = "Essential") eps_comb <- bind_rows(pgs_eps %>% select(data, group), nones_eps %>% select(data, group), ess_eps %>% select(data, group)) library(ggsignif) # p12 <- pvalue(independence_test(data ~ group, data = (eps_comb %>% filter(group == "proto-genes" | group == "Nonessential")))) # p13 <- pvalue(independence_test(data ~ group, data = (eps_comb %>% filter(group == "proto-genes" | group == "Essential")))) # p23 <- pvalue(independence_test(data ~ group, data = (eps_comb %>% filter(group == "Essential" | group == "Nonessential")))) ggplot(eps_comb %>% filter(data <= -0.2), aes(x = data, color = group, y = ..ndensity..)) + geom_freqpoly(binwidth = 0.05) + geom_vline(xintercept = -0.2, linetype = "dashed") + xlim(NA, -0.2) #+geom_vline(xintercept = 0.08,linetype="dashed") ggplot(eps_comb %>% filter(data <= -0), aes(x = data, color = group, y = ..ndensity..)) + geom_freqpoly(binwidth = 0.05) + geom_vline(xintercept = -0.2, linetype = "dashed") + xlim(NA, -0) #+geom_vline(xintercept = 0.08,linetype="dashed") +scale_x_discrete(labels = c("Proto-genes", "Non-essential Genes", "Essential Genes")) + theme_bw() + ylab("Shortest distance") + xlab("") + theme( panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size = 16) ) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange")) # + # geom_signif( # annotations = c(paste0("p=", formatC(p12, digits = 3)), paste0("p=", formatC(p13, digits = 3)), paste0("p=", formatC(p23, digits = 3))), # y_position = c(max(eps_comb$data) + 1, max(eps_comb$data) + 3, max(eps_comb$data) + 2), xmin = c(1, 1, 2), xmax = c(2, 3, 3) # )
net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2)# & `Double mutant fitness standard deviation`<=0.1) net=graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F) pgs.subnetwork(net,pgs$orf_name) %>% write_graph('analysis/figures/0708pres/int_.2_nosd.graphml',format='graphml') #plot(net %>% induced_subgraph(neighborhood(.,order = 1,nodes = 'yer175w-a')[[1]])) #neighborhood(net,order = 1,nodes = 'YER175W-A')
library(RCy3) net=graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F) %>% simplify() pgs_sub <- pgs.subnetwork(net,pgs$orf_name) induced_subgraph(pgs_sub,names(components(pgs_sub)$membership[components(pgs_sub)$membership==1]))%>% createNetworkFromIgraph('int_pgs') setNodeSizeMapping('cat',c('gene','proto-gene'),sizes=c(30,60),mapping.type = 'd') layoutNetwork('kamada-kawai')# defaultSpringLength=100 defaultSpringCoefficient=0.0000003') setEdgeColorDefault('#000000') setBackgroundColorDefault('#ffffff') setEdgeOpacityDefault(50) fitContent() exportImage("analysis/figures/0812_poster/pgs_sub_int", "PDF",resolution=300, height = 5, width = 5,units='inches')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.