library(plyr) library(igraph) library(tidyverse) pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv") gi <- read_delim("../anne/linkcom/intwedgelist", delim = " ", col_names = F) gi_net <- graph_from_edgelist(as.matrix(gi[c(1, 2)]), directed = F) E(gi_net)$linkcom <- gi[, 3] %>% pull() allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name]) nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") gi_net_neigh <- neighborhood(gi_net, order = 1, nodes = V(gi_net)[V(gi_net)$name %in% pgs$orf_name]) %>% map(function(x) induced_subgraph(gi_net, x)) ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F) # neigh[[1]] # for (i in 1:length(gi_net_neigh)) { # name <- V(gi_net)$name[V(gi_net)$name %in% pgs][i] #n <- gi_net_neigh[V(gi_net)$name[V(gi_net)$name %in% pgs$orf_name] == "YBR196C-A"][[1]] # pdf(paste0("analysis/figures/linkcom_plots_0603/", name, ".pdf")) #plot(n, vertex.size = 2, label.cex = 0.1, edge.color = mapvalues(E(n)$linkcom, E(n)$linkcom %>% unique(), seq_along(E(n)$linkcom %>% unique())), layout = layout.kamada.kawai) # dev.off() # } # incident_edges(gi_net,V(gi_net)[V(gi_net)$name=='ybr196c-a'])[[1]]$linkcom %>% unique() # different_clusters <- map(V(gi_net)$name, function(x) length(unique(incident_edges(gi_net, V(gi_net)[V(gi_net)$name == x])[[1]]$linkcom))) # # numberofedges=map(V(gi_net)$name, function(x) length(incident_edges(gi_net,V(gi_net)[V(gi_net)$name==x])[[1]]$linkcom)) # names(different_clusters) <- V(gi_net)$name # deg.gi <- degree(gi_net) # deg_2 <- deg.gi[deg.gi > 1] # pgs.s <- different_clusters[names(different_clusters) %in% pgs$orf_name[pgs$orf_name %in% names(deg_2)]] %>% unlist() # # notpgs.s <- different_clusters[names(different_clusters) %in% pgs$orf_name == F & names(different_clusters) %in% names(deg_2) & names(different_clusters) %in% nones$`Systematic gene name`] %>% unlist() # # ess.s <- different_clusters[names(different_clusters) %in% ess.names$X1] %>% unlist() # # pgs_s_ratio <- pgs.s / deg_2[names(deg_2) %in% names(pgs.s)] # notpgs_s_ratio <- notpgs.s / deg_2[names(deg_2) %in% names(notpgs.s)] # ess.rat <- ess.s / deg.gi[names(deg.gi) %in% names(ess.s)] # x <- c(pgs.s, notpgs.s, ess.s) # g <- factor(rep(1:3, c(length(pgs.s), length(notpgs.s), length(ess.s))), labels = c("Proto-gene", "Nonessential", "essential")) # pairwise.wilcox.test(x, g) # # boxplot(pgs.s, notpgs.s, ess.s, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different edge clusters") # x <- c(pgs_s_ratio, notpgs_s_ratio, ess.rat) # boxplot(pgs_s_ratio, notpgs_s_ratio, ess.rat, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different clusters/Degree") # g <- factor(rep(1:3, c(length(pgs_s_ratio), length(notpgs_s_ratio), length(ess.rat))), labels = c("Proto-gene", "Nonessential", "essential")) # kt <- kruskal.test(x, g) # pairwise.wilcox.test(x, g) # boxplot(pgs_s_ratio[pgs_s_ratio<1],notpgs_s_ratio[notpgs_s_ratio<1],ess.rat[ess.rat<1]) # wilcox.test(pgs.s,notpgs.s)
library(plyr) pcc <- read_delim("../anne/linkcom/pccwedgelist.el", delim = " ", col_names = F) pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) E(pcc_net)$linkcom <- pcc[, 3] %>% pull() allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name]) nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") pcc_net_neigh <- neighborhood(pcc_net, order = 1, nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x)) ess <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) # neigh[[1]] # for (i in 1:length(pcc_net_neigh)) { # name <- V(pcc_net)$name[V(pcc_net)$name %in% pgs.alleles][i] n <- pcc_net_neigh[[16]] # pdf(paste0("analysis/figures/linkcom_plots_0603/", name, ".pdf")) plot(n, vertex.size = 2, label.cex = 0.1, edge.color = mapvalues(E(n)$linkcom, E(n)$linkcom %>% unique(), seq_along(E(n)$linkcom %>% unique())), layout = layout.kamada.kawai) pcc_net_neigh <- neighborhood(pcc_net, order = 2, nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x)) n <- pcc_net_neigh[[16]] # pdf(paste0("analysis/figures/linkcom_plots_0603/", name, ".pdf")) plot(n, vertex.size = 2, label.cex = 0.1, edge.color = mapvalues(E(n)$linkcom, E(n)$linkcom %>% unique(), seq_along(E(n)$linkcom %>% unique())), layout = layout.kamada.kawai) # dev.off() # } # incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name=='ybr196c-a'])[[1]]$linkcom %>% unique() different_clusters <- map(V(pcc_net)$name, function(x) length(unique(incident_edges(pcc_net, V(pcc_net)[V(pcc_net)$name == x])[[1]]$linkcom))) # numberofedges=map(V(pcc_net)$name, function(x) length(incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name==x])[[1]]$linkcom)) names(different_clusters) <- V(pcc_net)$name deg.pcc <- degree(pcc_net) deg_2 <- deg.pcc[deg.pcc > 1] pgs.s <- different_clusters[names(different_clusters) %in% pgs.alleles[pgs.alleles %in% names(deg_2)]] %>% unlist() # notpgs.s <- different_clusters[names(different_clusters) %in% pgs.alleles == F & names(different_clusters) %in% nones$`Allele Gene name` & names(different_clusters) %in% names(deg_2)] %>% unlist() # ess.s <- different_clusters[names(different_clusters) %in% ess$X1] %>% unlist() pgs_s_ratio <- pgs.s / deg.pcc[names(deg.pcc) %in% names(pgs.s)] notpgs_s_ratio <- notpgs.s / deg.pcc[names(deg.pcc) %in% names(notpgs.s)] ess.rat <- ess.s / deg.pcc[names(deg.pcc) %in% names(ess.s)] x <- c(pgs.s, notpgs.s, ess.s) g <- factor(rep(1:3, c(length(pgs.s), length(notpgs.s), length(ess.s))), labels = c("Proto-gene", "Nonessential", "essential")) pairwise.wilcox.test(x, g) boxplot(pgs.s, notpgs.s, ess.s, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different edge clusters") x <- c(pgs_s_ratio, notpgs_s_ratio, ess.rat) boxplot(pgs_s_ratio, notpgs_s_ratio, ess.rat, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different clusters/Degree") g <- factor(rep(1:3, c(length(pgs_s_ratio), length(notpgs_s_ratio), length(ess.rat))), labels = c("Proto-gene", "Nonessential", "essential")) kt <- kruskal.test(x, g) pairwise.wilcox.test(x, g) # boxplot(pgs_s_ratio[pgs_s_ratio<1],notpgs_s_ratio[notpgs_s_ratio<1],ess.rat[ess.rat<1]) # wilcox.test(pgs.s,notpgs.s)
allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") #fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el") #pcc <- read_delim(fname, delim = " ", col_names = F) #pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") #%>% add.degree(pcc_net, colnames(.)[2]) E(pcc_net)$linkcom <- pcc[, 3] %>% pull() allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name]) nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") pcc_net_neigh <- neighborhood(pcc_net, order = 1, nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x)) ess <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) # different_clusters <- map(V(pcc_net)$name, function(x) length(unique(incident_edges(pcc_net, V(pcc_net)[V(pcc_net)$name == x])[[1]]$linkcom))) # # numberofedges=map(V(pcc_net)$name, function(x) length(incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name==x])[[1]]$linkcom)) # names(different_clusters) <- V(pcc_net)$name # # deg.pcc <- degree(pcc_net) # # # deg_2 <- deg.pcc[deg.pcc>1] # rand.s <- c() # ess.rand.s <- c() # pgs.s <- c() # # pgs.s <- different_clusters[names(different_clusters)%in%pgs.alleles] %>% unlist()#[pgs.alleles%in%names(deg_2)] # for (i in 1:100) { # pgs.s <- append(pgs.s, mean(different_clusters[names(different_clusters) %in% pgs.alleles] %>% unlist())) # [pgs.alleles%in%names(deg_2)] # # s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data, colname = colnames(nones.exp.data)[2], dist = T, T, T) # rand.s <- append(rand.s, mean(different_clusters[names(different_clusters) %in% s] %>% unlist())) # [s%in%names(deg_2)] # ess.rand.s <- append(ess.rand.s, mean(different_clusters[names(different_clusters) %in% (ess$X1 %>% sample(length(pgs.s)))] %>% unlist())) # }
source('R/dist.sample.R') source('R/add.degree.R') source('R/sample.nones.R') set.seed(1) # allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds') nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")# %>% add.degree(pcc_net, colnames(.)[2]) exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv") strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds') essentials <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull() #deg.pcc <- degree(pcc_net) linkcom_df <- data.frame() rand_linkcom_df <- data.frame() colname=colnames(nones.exp.data)[2] pgs.alleles <- strain_ids %>% filter(`Systematic gene name` %in% pgs$orf_name) %>% select(`Allele Gene name`) %>% pull() for (itr in seq(0.1, 0.9, 0.1)) { #print(itr) #fname <- paste0("../anne/linkcom/mypcc/network.tsv_", itr, ".el") fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el") pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F)) largeclusters <- pcc %>% group_by(X3) %>% dplyr::summarise(n = dplyr::n()) %>% filter(n > 2) %>% dplyr::select(X3) pcc_clfiltered <- pcc #%>% filter(X3 %in% largeclusters$X3) pcc_net <- graph_from_edgelist(as.matrix(pcc_clfiltered[c(1, 2)]), directed = F) E(pcc_net)$linkcom <- pcc_clfiltered[, 3] %>% pull() nums <- pcc %>% group_by(X3) %>% dplyr::summarise(n = dplyr::n()) #pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name]) linkcom_df <- bind_rows(linkcom_df, data.frame( i = itr, numberofclusters = nrow(nums), cl1 = sum(nums$n == 1), cl2 = sum(nums$n == 2), cl3 = sum(nums$n == 3), cl4 = sum(nums$n == 4), cl5 = sum(nums$n > 4) )) # pcc_net_neigh <- neighborhood(pcc_net, order = 1,nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x)) nones.exp.data <- nones.exp.data %>% add.degree(pcc_net,colnames(.)[2]) exp.number.data.filtered <- exp.number.data %>% add.degree(pcc_net, colname) %>% mutate(int_density = deg / num) different_clusters <- map(V(pcc_net)$name, function(x) length(unique(incident_edges(pcc_net, V(pcc_net)[V(pcc_net)$name == x])[[1]]$linkcom))) # numberofedges=map(V(pcc_net)$name, function(x) length(incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name==x])[[1]]$linkcom)) names(different_clusters) <- V(pcc_net)$name # deg_2 <- deg.pcc[deg.pcc>1] rand.s <- c() ess.rand.s <- c() pgs.s <- c() # pgs.s <- different_clusters[names(different_clusters)%in%pgs.alleles] %>% unlist()#[pgs.alleles%in%names(deg_2)] for (i in 1:100) { pgs.s <- append(pgs.s, mean(different_clusters[names(different_clusters) %in% pgs.alleles] %>% unlist())) # [pgs.alleles%in%names(deg_2)] s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(pcc_net)$name), colname , dist = T, T, T) rand.s <- append(rand.s, mean(different_clusters[names(different_clusters) %in% s] %>% unlist())) # [s%in%names(deg_2)] # ess.rand.s <- append(ess.rand.s, mean(different_clusters[names(different_clusters) %in% (ess$X1[ess$X1 %in% V(pcc_net)$name] %>% sample(length(pgs.alleles[pgs.alleles %in% V(pcc_net)$name])))] %>% unlist())) #hst <- deg.pcc[names(deg.pcc)%in%pgs.alleles] %>% hist(plot=F,breaks=-1:max(.)) ess.s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(pcc_net)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs.alleles[pgs.alleles %in% V(pcc_net)$name])),colname,array=FALSE,dist=T) ess.rand.s <- append(ess.rand.s,different_clusters[names(different_clusters)%in%ess.s] %>% unlist() %>% mean(na.rm=T)) } smdf <- bind_rows(data.frame(group = "proto-gene", data = pgs.s, i = itr), data.frame(group = "nonessential", data = rand.s, i = itr), data.frame(group = "essential", data = ess.rand.s, i = itr) ) #print(smdf) rand_linkcom_df <- bind_rows( rand_linkcom_df,smdf ) } rand_linkcom_df$i <- as.factor(rand_linkcom_df$i) # plotBoxSign(rand_linkcom_df,0.9) # rand_linkcom_df %>% filter(i==0.3) #saveRDS(rand_linkcom_df,'../anne/linkcom/rand_linkcom_withdegree_df.rds') # saveRDS(linkcom_df,'../anne/linkcom/linkcom_df.rds')
# allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds') nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") # allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds') ess <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) deg.pcc <- degree(pcc_net) linkcom_df <- data.frame() rand_linkcom_df <- data.frame() for (itr in seq(0.1, 0.9, 0.1)) { #print(itr) for(rand_itr in seq(1,20)){ fname <- paste0("../anne/linkcom/linkcom_res_rand/",rand_itr,"_", itr, ".el") pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F)) rand_pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) E(rand_pcc_net)$linkcom <- pcc[, 3] %>% pull() nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") %>% add.degree(rand_pcc_net, colnames(.)[2]) } nums <- pcc %>% group_by(X3) %>% dplyr::summarise(n = dplyr::n()) #pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name]) linkcom_df <- bind_rows(linkcom_df, data.frame( i = itr, numberofclusters = nrow(nums), cl1 = sum(nums$n == 1), cl2 = sum(nums$n == 2), cl3 = sum(nums$n == 3), cl4 = sum(nums$n == 4), cl5 = sum(nums$n > 4) )) # rand_pcc_net_neigh <- neighborhood(rand_pcc_net, order = 1,nodes = V(rand_pcc_net)[V(rand_pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(rand_pcc_net, x)) different_clusters <- map(V(rand_pcc_net)$name, function(x) length(unique(incident_edges(rand_pcc_net, V(rand_pcc_net)[V(rand_pcc_net)$name == x])[[1]]$linkcom))) # numberofedges=map(V(rand_pcc_net)$name, function(x) length(incident_edges(rand_pcc_net,V(rand_pcc_net)[V(rand_pcc_net)$name==x])[[1]]$linkcom)) names(different_clusters) <- V(rand_pcc_net)$name # deg_2 <- deg.pcc[deg.pcc>1] rand.s <- c() ess.rand.s <- c() pgs.s <- c() # pgs.s <- different_clusters[names(different_clusters)%in%pgs.alleles] %>% unlist()#[pgs.alleles%in%names(deg_2)] for (i in 1:100) { pgs.s <- append(pgs.s, mean(different_clusters[names(different_clusters) %in% pgs.alleles] %>% unlist())) # [pgs.alleles%in%names(deg_2)] s <- sample.nones(pgs.alleles[pgs.alleles %in% V(rand_pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(rand_pcc_net)$name), colname = colnames(nones.exp.data)[2], dist = F, T, T) rand.s <- append(rand.s, mean(different_clusters[names(different_clusters) %in% s] %>% unlist())) # [s%in%names(deg_2)] ess.rand.s <- append(ess.rand.s, mean(different_clusters[names(different_clusters) %in% (ess$X1[ess$X1 %in% V(rand_pcc_net)$name] %>% sample(length(pgs.alleles[pgs.alleles %in% V(rand_pcc_net)$name])))] %>% unlist())) } smdf <- bind_rows(data.frame(group = "proto-gene", data = pgs.s, i = itr), data.frame(group = "nonessential", data = rand.s, i = itr), data.frame(group = "essential", data = ess.rand.s, i = itr) ) #print(smdf) rand_linkcom_df <- bind_rows( rand_linkcom_df,smdf ) } rand_linkcom_df$i <- as.factor(rand_linkcom_df$i) # plotBoxSign(rand_linkcom_df,0.9) # rand_linkcom_df %>% filter(i==0.3) # saveRDS(rand_linkcom_df,'../anne/linkcom/rand_linkcom_df.rds') # saveRDS(linkcom_df,'../anne/linkcom/linkcom_df.rds')
library(igraph) library(tidyverse) #linkcom_df=readRDS('../anne/linkcom/linkcom_df.rds') library(DT) datatable(linkcom_df,colnames = c('Parameter','Total #of clusters','Clusters with 1 edge','Clusters with 2 edge', 'Clusters with 3 edge','Clusters with 4 edge','Clusters with 5 or more edge')) %>% formatStyle(names(linkcom_df),color='black') #getwd()
library(ggsignif) # saveRDS(cl_mean_df_list, "../anne/mcl/cl_mean_df_list.rds") # saveRDS(clusterdf, "../anne/mcl/clusterdf.rds") #rand_linkcom_df <- readRDS('../anne/linkcom/rand_linkcom_withdegree_df.rds') plotBoxSign <- function(data, itr,signf=F) { data$group <- factor(data$group, levels = c("proto-gene", "nonessential", "essential")) data <- data %>% filter(i == itr) p12 <- 2 * sum(data$data[data$group == "proto-gene"] < data$data[data$group == "nonessential"]) / length(data$data[data$group == "proto-gene"]) p13 <- 2 * sum(data$data[data$group == "proto-gene"] < data$data[data$group == "essential"]) / length(data$data[data$group == "proto-gene"]) p23 <- 2 * sum(data$data[data$group == "nonessential"] < data$data[data$group == "essential"]) / length(data$data[data$group == "proto-gene"]) if(signf){ ggplot(data, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = c("Proto-gene","Non-essential Genes","Essential Genes")) +theme_bw()+ ylab("Mean") + xlab("") + theme(axis.text.x = element_text(size=20),axis.title.y = element_text(size = 25),legend.position = 'none') + scale_fill_manual(name = "",values = c("#00BFC4", "#FF3300", "orange")) +xlab('')+ylab('Mean number of different clusters')+ geom_signif( annotations = c(formatC(p12, digits = 3), formatC(p13, digits = 3), formatC(p23, digits = 3)), y_position = c(max(data$data) + 1, max(data$data) + 2, max(data$data) + 3), xmin = c(1, 1, 2), xmax = c(2, 3, 3))+ggtitle(itr) }else{ ggplot(data, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels =c("Proto-gene","Non-essential Genes","Essential Genes")) +theme_bw()+ ylab("Mean") + xlab("") + theme(axis.title.y = element_text(size = 25)) + scale_fill_manual(name = "", labels = element_blank(),values = c("#00BFC4", "#FF3300", "orange")) + xlab('')+ylab('Mean number of different clusters')+ ggtitle(itr) } } # map(seq(12,30,2),function(x) plotBoxSign(rand_linkcom_df,x)) # for(i in seq(12,30,2)){ # plotBoxSign(rand_linkcom_df, i) # }
plotBoxSign(rand_linkcom_df, 0.1)
plotBoxSign(rand_linkcom_df, 0.2,T)
plotBoxSign(rand_linkcom_df, 0.3,T)
plotBoxSign(rand_linkcom_df, 0.4,T)
plotBoxSign(rand_linkcom_df, 0.5,F) +theme(plot.title = element_text(hjust = 0.5,size=14),legend.position = 'none',axis.text.x = element_text(size=11),axis.title.y = element_text(size=14)) +ylab('Mean number of \ndifferent clusters connected')+ggtitle('Link community clustering')+scale_x_discrete(labels =c("Proto-gene","Non-essential\nGenes","Essential\nGenes")) ggsave('analysis/figures/0812_poster/linkcom.pdf',height = 4,width = 4,unit='in')
plotBoxSign(rand_linkcom_df, 0.6,T)
plotBoxSign(rand_linkcom_df, 0.7,T)
plotBoxSign(rand_linkcom_df, 0.8,T) # ggplot(rand_linkcom_df %>% fi, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = element_blank()) + # ylab("Mean") + xlab("") + theme(axis.title.y = element_text(size = 25)) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))
plotBoxSign(rand_linkcom_df, 0.9,T) # ggplot(rand_linkcom_df %>% filter(i==0.9), aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = element_blank()) + # ylab("Mean") + xlab("") + theme(axis.title.y = element_text(size = 25)) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))
library(org.Sc.sgd.db) OrgDb <- org.Sc.sgd.db sgd <- AnnotationDbi::select(OrgDb, keys=keys(OrgDb), keytype='ORF',columns = 'SGD') f2 <- function(x) { str_split(x, "-") %>% unlist() %>% magrittr::extract(1) # fs$X1 %>% map(str_split,'-') %>% unlist %>% magrittr::extract(1) } f <- function(x) { ch <- paste0("cluster", x, " ") allelenames <- fs$X1[fs$X3 == x & is.na(fs$X1) == F] genenames <- sgd$SGD[sgd$ORF%in%strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` %in% allelenames]] ch <- paste0(ch, paste0(as.character(genenames), collapse = " ")) ch <- paste0(ch, " batch") } strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # %>% mutate(protname=) for (itr in seq(0.1, 0.9, 0.1)) { fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el") pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F)) largeclusters <- pcc %>% group_by(X3) %>% dplyr::summarise(n = dplyr::n()) %>% filter(n > 4) %>% dplyr::select(X3) pcc_clfiltered <- pcc %>% filter(X3 %in% largeclusters$X3) pcc_fl <- pcc_clfiltered[, c(2, 1, 3)] colnames(pcc_fl) <- colnames(pcc_clfiltered) fs <- bind_rows(pcc_clfiltered, pcc_fl) # %>% select(X1,X3) #%>% unique() fs <- fs[, c(1, 3)] %>% unique() map(fs$X3, f)[[1]]# %>% paste0(collapse = "") %>% writeLines(paste0('../networkGo/linkcom/',itr,'.txt')) } # fs$X1 <- map_chr(fs$X1,f2) #[[1]] #%>% map(as.character) %>% append('batch') #%>% writeLines('d.txt') # strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name`%in%fs$X1[fs$X3==11707]] fs$X3 %>% unique() %>% length()
library(org.Sc.sgd.db) OrgDb <- org.Sc.sgd.db sgd <- AnnotationDbi::select(OrgDb, keys=keys(OrgDb), keytype='ORF',columns = 'SGD') write_separate <- function(x,itr) { ch <- c() allelenames <- fs$X1[fs$X3 == x & is.na(fs$X1) == F] genenames <- sgd$SGD[sgd$ORF%in%strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` %in% allelenames]] ch <- paste0(ch, paste0(as.character(genenames), collapse = " ")) writeLines(ch,paste0('../networkGo/goatools/input/',itr,'/',x,'.txt')) } strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # %>% mutate(protname=) for (itr in seq(0.1, 0.9, 0.1)) { dir.create(paste0('../networkGo/goatools/input/',itr)) fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el") pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F)) largeclusters <- pcc %>% group_by(X3) %>% dplyr::summarise(n = dplyr::n()) %>% filter(n > 4) %>% dplyr::select(X3) pcc_clfiltered <- pcc %>% filter(X3 %in% largeclusters$X3) pcc_fl <- pcc_clfiltered[, c(2, 1, 3)] colnames(pcc_fl) <- colnames(pcc_clfiltered) fs <- bind_rows(pcc_clfiltered, pcc_fl) # %>% select(X1,X3) #%>% unique() fs <- fs[, c(1, 3)] %>% unique() map(fs$X3, write_separate,itr)# %>% paste0(collapse = "") %>% writeLines(paste0('../networkGo/linkcom/',itr,'.txt')) } path <- '../networkGo/goatools/input/' for(itr in list.files(path)){ for(j in list.files(paste0(path,itr))){ input=paste0(path,itr,'/',j) system(paste0("python find_enrichment.py ",input," ../sgd_costanzogenes sgd.gaf --pval=0.05 --method=fdr_bh --pval_field=p_uncorrected --outfile=",input, ".out --obo ../go.obo --ns BP --ev_exc ND")) } } ###python find_enrichment.py deneme ../sgd_costanzogenes sgd.gaf --pval=0.05 --method=fdr_bh --pval_field=p_uncorrected --outfile=out_bon.xlsx --obo ../go.obo
fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", 0.5, ".el") pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F)) #pcc <- read_delim("../anne/linkcom/pccwedgelist.el", delim = " ", col_names = F) pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F) E(pcc_net)$linkcom <- pcc$X3 pcc %>% filter(X1=='dgr1'|X1=='ybr196c-a') #pcc_clfiltered_nodes <- c(pcc_clfiltered$X1, pcc_clfiltered$X2) %>% unique() # pcc_net_filtered <- graph_from_edgelist(as.matrix(pcc_clfiltered[c(1,2)]),directed = F) #pcc_net_filtered <- induced_subgraph(pcc_net, V(pcc_net)[V(pcc_net)$name %in% pcc_clfiltered_nodes]) #V(pcc_net)$name[V(pcc_net)$name %in% pgs.alleles] %>% map(function(x) neighborhood(pcc_net, nodes = x)) get.edgelist(ybr_sub) ybr_sub <- induced_subgraph(pcc_net, vids = neighborhood(pcc_net, order = 1, nodes = "pex4") %>% unlist()) plot(ybr_sub, vertex.size=2,edge.color = E(ybr_sub)$linkcom, layout = layout.kamada.kawai) E(ybr_sub)$linkcom %>% unique() # %>% E() #%>% magrittr::extract2('linkcom')#$linkcom write_graph(ybr_sub,format='graphml','~/Downloads/pex_sub.graphml') net <- pcc_net ybr <- neighborhood(net,order=1,nodes='ybr196c-a') ybr_net <- induced_subgraph(net,ybr[[1]]) #%>% #plot(vertex.size=2,edge.color = E(.)$linkcom, layout = layout.kamada.kawai) ybr_net degree(net,'ybr196c-a') exp.number.data %>% add.degree(net,colnames(exp.number.data)[2]) %>% filter(deg==degree(net,'ybr196c-a')) #sample.nones('ybr196c-a',exp.number.data %>% add.degree(net,colnames(exp.number.data)[2]),colnames(exp.number.data)[2],dist=T,array = F) hri <- neighborhood(net,order=1,nodes='sec13-1') hrnet <- induced_subgraph(net,hri[[1]]) #%>% plot(vertex.size=2,edge.color = E(.)$linkcom, layout = layout.kamada.kawai) E(induced_subgraph(net,hri[[1]]))$linkcom %>% unique E(induced_subgraph(net,ybr[[1]]))$linkcom %>% unique library(RCy3) induced_subgraph(net,ybr[[1]]) %>%createNetworkFromIgraph("myIgraph") layoutNetwork('force-directed defaultSpringLength=70 defaultSpringCoefficient=0.000003') #RColorBrewer::brewer.pal(length(ybr[[1]])-1,'Paired') setEdgeColorMapping('linkcom', E(ybr_net)$linkcom %>% unique(),RColorBrewer::brewer.pal(length(ybr[[1]])-1,'Paired'),mapping.type = 'd') setBackgroundColorDefault('#ffffff') setEdgeLineWidthDefault(12) setNodeColorBypass('ybr196c-a','#00CCCC') setNodeSizeDefault(50) setEdgeOpacityDefault(255) #?setNodeOpacity exportImage("analysis/figures/0812_poster/ybr_image_0814", "PNG",resolution=300, height = 4, width = 4,units='inches') induced_subgraph(net,hri[[1]]) %>%createNetworkFromIgraph("hr") layoutNetwork('force-directed defaultSpringLength=100 defaultSpringCoefficient=0.0000003') setEdgeColorMapping('linkcom', E(hrnet)$linkcom %>% unique(),RColorBrewer::brewer.pal(length(hri[[1]])-1,'Paired'),mapping.type = 'd') setNodeColorBypass('sec13-1','#ffa500') exportImage("analysis/figures/0812_poster/sec_image_0814", "PNG",resolution=300, height = 4, width = 4,units='inches') pgs_sub <- pgs.subnetwork(net,pgs_allele) induced_subgraph(pgs_sub,names(components(pgs_sub)$membership[components(pgs_sub)$membership==1]))%>% createNetworkFromIgraph('pcc_pgs') setNodeSizeMapping('cat',c('gene','proto-gene'),sizes=c(30,60),mapping.type = 'd') layoutNetwork('kamada-kawai')# defaultSpringLength=100 defaultSpringCoefficient=0.0000003') setEdgeColorDefault('#000000') setBackgroundColorDefault('#ffffff') setEdgeOpacityDefault(255) fitContent() exportImage("analysis/figures/0812_poster/pgs_sub", "PDF",resolution=300, height = 5, width = 5,units='inches')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.