This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(igraph) library(tidyverse) source("R/sample.nones.R") source("R/add.degree.R") source("R/dist.sample.R") set.seed(1) #pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv") allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") # pcc <- read_delim("~/Main/pgsNetwork/analysis/data/derived_data/mypcc_network.tsv", delim = "\t", col_names = F) # pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) pcc <- read_delim("../anne/linkcom/pccwedgelist.el", delim = " ", col_names = F) pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") %>% add.degree(pcc_net, colnames(.)[2]) pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name]) essentials <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull() clusterdata <- readLines("../anne/mcl/out.data1.mci.I14.labels.out") # induced.subgraph(pcc_net,mclres[str_detect(mclres,'ybr196c-a')] %>% str_split('\t') %>% unlist) %>% plot() # mclres[str_detect(mclres,'ybr196c-a')] %>% str_split('\t') %>% unlist %>% write # name <- 'ybr196c-a' neighborclusters <- function(pcc_net, name, clusterdata) { neigh <- neighbors(pcc_net, name)$name map(neigh, function(x) which(str_detect(clusterdata, x))) %>% unlist() %>% unique() } # # pgs_cluster_means <- c() # rand_cluster_means <- c() # rand_ess_cl_means <- c() # for (i in 1:1000) { # pgs_cluster_means <- append(pgs_cluster_means, pgs.alleles[pgs.alleles %in% V(pcc_net)$name] %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) # # rand_cluster_means <- append(rand_cluster_means, sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(pcc_net)$name), colnames(nones.exp.data)[2], dist = F) %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) # rand_ess_cl_means <- append(rand_ess_cl_means, ess$X1[ess$X1 %in% V(pcc_net)$name] %>% sample(length(pgs.alleles[pgs.alleles %in% V(pcc_net)$name])) %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) # } # # # # boxplot(pgs_cluster_means, rand_cluster_means, rand_ess_cl_means) # sum(pgs_cluster_means < rand_cluster_means)
neighborclusters <- function(pcc_net, name, clusterdata) { neigh <- neighbors(pcc_net, name)$name V(pcc_net)[neigh]$memb %>% unique() # map(neigh, function(x) which(str_detect(clusterdata, x))) %>% # unlist() %>% # unique() } nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv") colname <- colnames(nones.exp.data)[2] cl_mean_df_list <- data.frame() clusterdf <- data.frame() # i=seq(14, 30, 2),numberofclusters=NA,cl1=NA,cl2=NA,cl3=NA,cl4=NA,cl5=NA) vv=as.character(seq(1.2,5.0,0.2)) vv[c(5,10,15,20)] <- c("2.0","3.0","4.0","5.0") #for (itr in vv) { for(itr in seq(12,26,2)){ #clusterdata <- readLines(paste0("~/Main/anne/mcl/out.data1.mci.I12.labels.out", as.character(itr))) clusterdata <- readLines(paste0("~/Main/anne/mcl/out.data1.mci.I",itr,".labels.out")) ll <- map(clusterdata, function(x) length(str_split(x, " ") %>% unlist())) clusterdf <- data.frame( i = itr, numberofclusters = length(clusterdata), cl1 = sum(ll == 1), cl2 = sum(ll == 2), cl3 = sum(ll == 3), cl4 = sum(ll == 4), cl5 = sum(ll > 4) ) %>% bind_rows(clusterdf) #clusterdata <- clusterdata[ll>2] cldata <- map(clusterdata,function(x) unlist(str_split(x,'\t'))) V(pcc_net)$memb <- V(pcc_net)$name %>% map(function(x) map(1:length(cldata), function(i) any(cldata[[i]]==x)) %>% unlist() %>% which() ) %>% unlist() pgs_cluster_means <- c() rand_cluster_means <- c() rand_ess_cl_means <- c() deg.pcc <- degree(pcc_net) nones.exp.data <- nones.exp.data %>% add.degree(pcc_net, colname) %>% mutate(int_density = deg / num) exp.number.data.filtered <- exp.number.data %>% add.degree(pcc_net, colname) %>% mutate(int_density = deg / num) for (i in 1:100) { pgs_cluster_means <- append(pgs_cluster_means, pgs.alleles[pgs.alleles %in% V(pcc_net)$name] %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) rand_cluster_means <- append(rand_cluster_means, sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(pcc_net)$name), colnames(nones.exp.data)[2], dist = T) %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) #hst <- deg.pcc[names(deg.pcc)%in%pgs.alleles] %>% hist(plot=F,breaks=-1:max(.)) ess.s <- ess.s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(pcc_net)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs.alleles[pgs.alleles %in% V(pcc_net)$name])),colname,array=FALSE,dist=T) rand_ess_cl_means <- append(rand_ess_cl_means, ess.s %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) } cl_mean_df <- bind_rows( data.frame(group = "proto-gene", data = pgs_cluster_means, i = itr), data.frame(group = "nonessential", data = rand_cluster_means, i = itr), data.frame(group = "essential", data = rand_ess_cl_means, i = itr) ) cl_mean_df_list <- bind_rows(cl_mean_df_list, cl_mean_df) # boxplot(pgs_cluster_means, rand_cluster_means, rand_ess_cl_means) # title(itr) # sum(pgs_cluster_means < rand_cluster_means) } #saveRDS(cl_mean_df_list,'1000sim.rds') print('finished')
library(igraph) library(tidyverse) #clusterdf=readRDS("../anne/mcl/clusterdf.rds") library(DT) #colnames(clusterdf) datatable(clusterdf,colnames = c('Parameter','Total #of clusters','Clusters with 1 node','Clusters with 2 node', 'Clusters with 3 node','Clusters with 4 node','Clusters with 5 or more node')) %>% formatStyle(names(clusterdf),color='black') #formatStyle(names(.), color = 'red', backgroundColor = 'orange', fontWeight = 'bold') #getwd()
library(ggsignif) #saveRDS(cl_mean_df_list, "../anne/mcl/cl_mean_with_deg_df_list.rds") # saveRDS(clusterdf, "../anne/mcl/clusterdf.rds") #cl_mean_df_list <- readRDS('../anne/mcl/cl_mean_df_list.rds') #cl_mean_df_list <- readRDS('../anne/mcl/cl_mean_with_deg_df_list.rds') plotBoxSign <- function(data, itr, signf = F) { data$group <- factor(data$group, levels = c("proto-gene", "nonessential", "essential")) data <- data %>% filter(i == itr) p12 <- 2 * sum(data$data[data$group == "proto-gene"] < data$data[data$group == "nonessential"]) / length(data$data[data$group == "proto-gene"]) p13 <- 2 * sum(data$data[data$group == "proto-gene"] < data$data[data$group == "essential"]) / length(data$data[data$group == "proto-gene"]) p23 <- 2 * sum(data$data[data$group == "nonessential"] < data$data[data$group == "essential"]) / length(data$data[data$group == "proto-gene"]) if (signf) { ggplot(data, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = c("Proto-gene", "Non-essential Genes", "Essential Genes")) + theme_bw() + ylab("Mean") + xlab("") + theme_me_bar + scale_fill_manual(name = "", values = c("#1CBDC2", "#EF3D23", "#FAA51A")) + xlab("") + ylab("Mean number of different clusters") + geom_signif( annotations = c(formatC(p12, digits = 3), formatC(p13, digits = 3), formatC(p23, digits = 3)), y_position = c(max(data$data) + 1, max(data$data) + 2, max(data$data) + 3), xmin = c(1, 1, 2), xmax = c(2, 3, 3) ) + ggtitle(itr) } else { ggplot(data, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = c("Proto-gene", "Non-essential Genes", "Essential Genes")) + theme_bw() + ylab("Mean") + xlab("") + theme_me_bar + scale_fill_manual(name = "", labels = element_blank(), values = c("#1CBDC2", "#EF3D23", "#FAA51A")) + xlab("") + ylab("Mean number of different clusters") + ggtitle(itr) } } #map(vv,function(x) plotBoxSign(cl_mean_df_list,x)) for(i in vv){ plotBoxSign(cl_mean_df_list, i) }
plotBoxSign(cl_mean_df_list, 12,T)
plotBoxSign(cl_mean_df_list, 14,T)
plotBoxSign(cl_mean_df_list, 16,T)
plotBoxSign(cl_mean_df_list, 18,T)
plotBoxSign(cl_mean_df_list, 20,T)
plotBoxSign(cl_mean_df_list, 22,T)
library(ggsignif) plotBoxSign(cl_mean_df_list, 26,F)+ylab('Mean number of \ndifferent clusters connected')+ggtitle('')+scale_x_discrete(labels =c("Proto-gene","Non-essential\nGenes","Essential\nGenes"))+theme(legend.position = 'none') ggsave('analysis/figures/0821_paper/mcl.pdf',height = 4,width = 4,unit='in') # plotBoxSign(cl_mean_df_list, 24,T) # ggsave('analysis/figures/0805_paper/mcl.pdf',width = 12,height=12)
plotBoxSign(cl_mean_df_list, 26,T)
plotBoxSign(cl_mean_df_list, 2.8,T) plotBoxSign(cl_mean_df_list, "3.0",T) plotBoxSign(cl_mean_df_list, 3.2,T) plotBoxSign(cl_mean_df_list, 3.4,T) plotBoxSign(cl_mean_df_list, 3.6,T) plotBoxSign(cl_mean_df_list, 3.8,T) plotBoxSign(cl_mean_df_list, "4.0",T) plotBoxSign(cl_mean_df_list, 4.2,T) plotBoxSign(cl_mean_df_list, 4.4,T) plotBoxSign(cl_mean_df_list, 4.6,T) plotBoxSign(cl_mean_df_list, 4.8,T) plotBoxSign(cl_mean_df_list, "5.0",T)
pcc <- read_delim("../anne/linkcom/pccwedgelist.el", delim = " ", col_names = F) pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) cl_mean_df_list <- data.frame() clusterdf <- data.frame() # i=seq(14, 30, 2),numberofclusters=NA,cl1=NA,cl2=NA,cl3=NA,cl4=NA,cl5=NA) for (itr in 14) { itr=20 clusterdata <- readLines(paste0("../anne/mcl/out.data1.mci.I", itr, ".labels.out")) cldata <- map(clusterdata,function(x) unlist(str_split(x,'\t'))) V(pcc_net)$memb <- V(pcc_net)$name %>% map(function(x) map(1:length(cldata), function(i) any(cldata[[i]]==x)) %>% unlist() %>% which() ) %>% unlist() #sapply(1:length(myList), function(i) any(myList[[i]] == myValue) #V(pcc_net)$name %>% map(function(x) which(str_detect(clusterdata,x))) %>% unlist() %>% length ybr_sub <- induced_subgraph(pcc_net, vids = neighborhood(pcc_net, order = 1, nodes = "ybr196c-a") %>% unlist()) pex_sub <- induced_subgraph(pcc_net, vids = neighborhood(pcc_net, order = 1, nodes = "pex4") %>% unlist()) plot(ybr_sub, vertex.size=8,vertex.color=V(ybr_sub)$memb,edge.color = E(ybr_sub)$linkcom, layout = layout.kamada.kawai) plot(pex_sub, vertex.size=8,vertex.color=V(pex_sub)$memb,edge.color = E(pex_sub)$linkcom, layout = layout.kamada.kawai) E(ybr_sub)$linkcom %>% unique() # %>% E() #%>% magrittr::extract2('linkcom')#$linkcom #write_graph(ybr_sub,format='graphml','~/Downloads/pex_sub.graphml') pgs_cluster_means <- c() rand_cluster_means <- c() rand_ess_cl_means <- c() for (i in 1:100) { pgs_cluster_means <- append(pgs_cluster_means, pgs.alleles[pgs.alleles %in% V(pcc_net)$name] %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) rand_cluster_means <- append(rand_cluster_means, sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(pcc_net)$name), colnames(nones.exp.data)[2], dist = T) %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) hst <- deg.pcc[names(deg.pcc)%in%pgs.alleles] %>% hist(plot=F,breaks=-1:max(.)) ess.s <- dist.sample(pcc_net,hst$counts,V(pcc_net)$name[V(pcc_net)$name%in%ess$X1]) rand_ess_cl_means <- append(rand_ess_cl_means, ess.s %>% map(function(x) length(neighborclusters(pcc_net, x, clusterdata))) %>% unlist() %>% mean()) } cl_mean_df <- bind_rows( data.frame(group = "proto-gene", data = pgs_cluster_means, i = itr), data.frame(group = "nonessential", data = rand_cluster_means, i = itr), data.frame(group = "essential", data = rand_ess_cl_means, i = itr) ) cl_mean_df_list <- bind_rows(cl_mean_df_list, cl_mean_df) # boxplot(pgs_cluster_means, rand_cluster_means, rand_ess_cl_means) # title(itr) # sum(pgs_cluster_means < rand_cluster_means) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.