library(plyr)
library(igraph)
library(tidyverse)
pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv")

gi <- read_delim("../anne/linkcom/intwedgelist", delim = " ", col_names = F)
gi_net <- graph_from_edgelist(as.matrix(gi[c(1, 2)]), directed = F)
E(gi_net)$linkcom <- gi[, 3] %>% pull()
allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name])
nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
gi_net_neigh <- neighborhood(gi_net, order = 1, nodes = V(gi_net)[V(gi_net)$name %in% pgs$orf_name]) %>% map(function(x) induced_subgraph(gi_net, x))
ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F)
# neigh[[1]]
# for (i in 1:length(gi_net_neigh)) {
#   name <- V(gi_net)$name[V(gi_net)$name %in% pgs][i]
#n <- gi_net_neigh[V(gi_net)$name[V(gi_net)$name %in% pgs$orf_name] == "YBR196C-A"][[1]]
#   pdf(paste0("analysis/figures/linkcom_plots_0603/", name, ".pdf"))
#plot(n, vertex.size = 2, label.cex = 0.1, edge.color = mapvalues(E(n)$linkcom, E(n)$linkcom %>% unique(), seq_along(E(n)$linkcom %>% unique())), layout = layout.kamada.kawai)
#   dev.off()
# }
# incident_edges(gi_net,V(gi_net)[V(gi_net)$name=='ybr196c-a'])[[1]]$linkcom %>% unique()

# different_clusters <- map(V(gi_net)$name, function(x) length(unique(incident_edges(gi_net, V(gi_net)[V(gi_net)$name == x])[[1]]$linkcom)))
# # numberofedges=map(V(gi_net)$name, function(x) length(incident_edges(gi_net,V(gi_net)[V(gi_net)$name==x])[[1]]$linkcom))
# names(different_clusters) <- V(gi_net)$name
# deg.gi <- degree(gi_net)
# deg_2 <- deg.gi[deg.gi > 1]
# pgs.s <- different_clusters[names(different_clusters) %in% pgs$orf_name[pgs$orf_name %in% names(deg_2)]] %>% unlist() #
# notpgs.s <- different_clusters[names(different_clusters) %in% pgs$orf_name == F & names(different_clusters) %in% names(deg_2) & names(different_clusters) %in% nones$`Systematic gene name`] %>% unlist() #
# ess.s <- different_clusters[names(different_clusters) %in% ess.names$X1] %>% unlist()
# 
# pgs_s_ratio <- pgs.s / deg_2[names(deg_2) %in% names(pgs.s)]
# notpgs_s_ratio <- notpgs.s / deg_2[names(deg_2) %in% names(notpgs.s)]
# ess.rat <- ess.s / deg.gi[names(deg.gi) %in% names(ess.s)]
# x <- c(pgs.s, notpgs.s, ess.s)
# g <- factor(rep(1:3, c(length(pgs.s), length(notpgs.s), length(ess.s))), labels = c("Proto-gene", "Nonessential", "essential"))
# pairwise.wilcox.test(x, g)
# 
# boxplot(pgs.s, notpgs.s, ess.s, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different edge clusters")
# x <- c(pgs_s_ratio, notpgs_s_ratio, ess.rat)
# boxplot(pgs_s_ratio, notpgs_s_ratio, ess.rat, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different clusters/Degree")
# g <- factor(rep(1:3, c(length(pgs_s_ratio), length(notpgs_s_ratio), length(ess.rat))), labels = c("Proto-gene", "Nonessential", "essential"))
# kt <- kruskal.test(x, g)
# pairwise.wilcox.test(x, g)
# boxplot(pgs_s_ratio[pgs_s_ratio<1],notpgs_s_ratio[notpgs_s_ratio<1],ess.rat[ess.rat<1])
# wilcox.test(pgs.s,notpgs.s)
library(plyr)
pcc <- read_delim("../anne/linkcom/pccwedgelist.el", delim = " ", col_names = F)
pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F)
E(pcc_net)$linkcom <- pcc[, 3] %>% pull()
allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name])
nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
pcc_net_neigh <- neighborhood(pcc_net, order = 1, nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x))
ess <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F)
# neigh[[1]]
# for (i in 1:length(pcc_net_neigh)) {
#   name <- V(pcc_net)$name[V(pcc_net)$name %in% pgs.alleles][i]
n <- pcc_net_neigh[[16]]
#   pdf(paste0("analysis/figures/linkcom_plots_0603/", name, ".pdf"))
plot(n, vertex.size = 2, label.cex = 0.1, edge.color = mapvalues(E(n)$linkcom, E(n)$linkcom %>% unique(), seq_along(E(n)$linkcom %>% unique())), layout = layout.kamada.kawai)
pcc_net_neigh <- neighborhood(pcc_net, order = 2, nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x))
n <- pcc_net_neigh[[16]]
#   pdf(paste0("analysis/figures/linkcom_plots_0603/", name, ".pdf"))
plot(n, vertex.size = 2, label.cex = 0.1, edge.color = mapvalues(E(n)$linkcom, E(n)$linkcom %>% unique(), seq_along(E(n)$linkcom %>% unique())), layout = layout.kamada.kawai)
#   dev.off()
# }
# incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name=='ybr196c-a'])[[1]]$linkcom %>% unique()

different_clusters <- map(V(pcc_net)$name, function(x) length(unique(incident_edges(pcc_net, V(pcc_net)[V(pcc_net)$name == x])[[1]]$linkcom)))
# numberofedges=map(V(pcc_net)$name, function(x) length(incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name==x])[[1]]$linkcom))
names(different_clusters) <- V(pcc_net)$name
deg.pcc <- degree(pcc_net)
deg_2 <- deg.pcc[deg.pcc > 1]
pgs.s <- different_clusters[names(different_clusters) %in% pgs.alleles[pgs.alleles %in% names(deg_2)]] %>% unlist() #
notpgs.s <- different_clusters[names(different_clusters) %in% pgs.alleles == F & names(different_clusters) %in% nones$`Allele Gene name` & names(different_clusters) %in% names(deg_2)] %>% unlist() #
ess.s <- different_clusters[names(different_clusters) %in% ess$X1] %>% unlist()

pgs_s_ratio <- pgs.s / deg.pcc[names(deg.pcc) %in% names(pgs.s)]
notpgs_s_ratio <- notpgs.s / deg.pcc[names(deg.pcc) %in% names(notpgs.s)]
ess.rat <- ess.s / deg.pcc[names(deg.pcc) %in% names(ess.s)]
x <- c(pgs.s, notpgs.s, ess.s)
g <- factor(rep(1:3, c(length(pgs.s), length(notpgs.s), length(ess.s))), labels = c("Proto-gene", "Nonessential", "essential"))
pairwise.wilcox.test(x, g)

boxplot(pgs.s, notpgs.s, ess.s, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different edge clusters")
x <- c(pgs_s_ratio, notpgs_s_ratio, ess.rat)
boxplot(pgs_s_ratio, notpgs_s_ratio, ess.rat, names = c("Proto-gene", "Nonessential", "Essential"), ylab = "Different clusters/Degree")
g <- factor(rep(1:3, c(length(pgs_s_ratio), length(notpgs_s_ratio), length(ess.rat))), labels = c("Proto-gene", "Nonessential", "essential"))
kt <- kruskal.test(x, g)
pairwise.wilcox.test(x, g)
# boxplot(pgs_s_ratio[pgs_s_ratio<1],notpgs_s_ratio[notpgs_s_ratio<1],ess.rat[ess.rat<1])
# wilcox.test(pgs.s,notpgs.s)
allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
#fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el")
#pcc <- read_delim(fname, delim = " ", col_names = F)
#pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F)
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") #%>% add.degree(pcc_net, colnames(.)[2])

E(pcc_net)$linkcom <- pcc[, 3] %>% pull()
allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name])
nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
pcc_net_neigh <- neighborhood(pcc_net, order = 1, nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x))
ess <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F)

# different_clusters <- map(V(pcc_net)$name, function(x) length(unique(incident_edges(pcc_net, V(pcc_net)[V(pcc_net)$name == x])[[1]]$linkcom)))
# # numberofedges=map(V(pcc_net)$name, function(x) length(incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name==x])[[1]]$linkcom))
# names(different_clusters) <- V(pcc_net)$name
# 
# deg.pcc <- degree(pcc_net)
# 
# # deg_2 <- deg.pcc[deg.pcc>1]
# rand.s <- c()
# ess.rand.s <- c()
# pgs.s <- c()
# # pgs.s <- different_clusters[names(different_clusters)%in%pgs.alleles] %>% unlist()#[pgs.alleles%in%names(deg_2)]
# for (i in 1:100) {
#   pgs.s <- append(pgs.s, mean(different_clusters[names(different_clusters) %in% pgs.alleles] %>% unlist())) # [pgs.alleles%in%names(deg_2)]
# 
#   s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data, colname = colnames(nones.exp.data)[2], dist = T, T, T)
#   rand.s <- append(rand.s, mean(different_clusters[names(different_clusters) %in% s] %>% unlist())) # [s%in%names(deg_2)]
#   ess.rand.s <- append(ess.rand.s, mean(different_clusters[names(different_clusters) %in% (ess$X1 %>% sample(length(pgs.s)))] %>% unlist()))
# }
source('R/dist.sample.R')
source('R/add.degree.R')
source('R/sample.nones.R')
set.seed(1)
# allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds')
nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")# %>% add.degree(pcc_net, colnames(.)[2])
exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
# allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds')
essentials <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull()
#deg.pcc <- degree(pcc_net)
linkcom_df <- data.frame()
rand_linkcom_df <- data.frame()
colname=colnames(nones.exp.data)[2]
pgs.alleles <- strain_ids %>%
    filter(`Systematic gene name` %in% pgs$orf_name) %>%
    select(`Allele Gene name`) %>%
    pull()
for (itr in seq(0.1, 0.9, 0.1)) {
  #print(itr)
  #fname <- paste0("../anne/linkcom/mypcc/network.tsv_", itr, ".el")
  fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el")

  pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F))
  largeclusters <- pcc %>%
  group_by(X3) %>%
  dplyr::summarise(n = dplyr::n()) %>%
  filter(n > 2) %>%
  dplyr::select(X3)

  pcc_clfiltered <- pcc #%>% filter(X3 %in% largeclusters$X3)
  pcc_net <- graph_from_edgelist(as.matrix(pcc_clfiltered[c(1, 2)]), directed = F)
  E(pcc_net)$linkcom <- pcc_clfiltered[, 3] %>% pull()
  nums <- pcc %>%
    group_by(X3) %>%
    dplyr::summarise(n = dplyr::n())
  #pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name])
  linkcom_df <- bind_rows(linkcom_df, data.frame(
    i = itr,
    numberofclusters = nrow(nums),
    cl1 = sum(nums$n == 1),
    cl2 = sum(nums$n == 2),
    cl3 = sum(nums$n == 3),
    cl4 = sum(nums$n == 4),
    cl5 = sum(nums$n > 4)
  ))
  # pcc_net_neigh <- neighborhood(pcc_net, order = 1,nodes = V(pcc_net)[V(pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(pcc_net, x))
nones.exp.data <- nones.exp.data %>% add.degree(pcc_net,colnames(.)[2])
  exp.number.data.filtered <- exp.number.data %>%
      add.degree(pcc_net, colname) %>%
      mutate(int_density = deg / num) 
  different_clusters <- map(V(pcc_net)$name, function(x) length(unique(incident_edges(pcc_net, V(pcc_net)[V(pcc_net)$name == x])[[1]]$linkcom)))
  # numberofedges=map(V(pcc_net)$name, function(x) length(incident_edges(pcc_net,V(pcc_net)[V(pcc_net)$name==x])[[1]]$linkcom))
  names(different_clusters) <- V(pcc_net)$name



  # deg_2 <- deg.pcc[deg.pcc>1]
  rand.s <- c()
  ess.rand.s <- c()
  pgs.s <- c()
  # pgs.s <- different_clusters[names(different_clusters)%in%pgs.alleles] %>% unlist()#[pgs.alleles%in%names(deg_2)]
  for (i in 1:100) {
    pgs.s <- append(pgs.s, mean(different_clusters[names(different_clusters) %in% pgs.alleles] %>% unlist())) # [pgs.alleles%in%names(deg_2)]

    s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(pcc_net)$name), colname , dist = T, T, T)
    rand.s <- append(rand.s, mean(different_clusters[names(different_clusters) %in% s] %>% unlist())) # [s%in%names(deg_2)]
    # ess.rand.s <- append(ess.rand.s, mean(different_clusters[names(different_clusters) %in% (ess$X1[ess$X1 %in% V(pcc_net)$name] %>% sample(length(pgs.alleles[pgs.alleles %in% V(pcc_net)$name])))] %>% unlist()))
    #hst <- deg.pcc[names(deg.pcc)%in%pgs.alleles] %>% hist(plot=F,breaks=-1:max(.))
    ess.s <- sample.nones(pgs.alleles[pgs.alleles %in% V(pcc_net)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(pcc_net)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs.alleles[pgs.alleles %in% V(pcc_net)$name])),colname,array=FALSE,dist=T)

    ess.rand.s <- append(ess.rand.s,different_clusters[names(different_clusters)%in%ess.s] %>% unlist() %>% mean(na.rm=T))
  }
  smdf <- bind_rows(data.frame(group = "proto-gene", data = pgs.s, i = itr),
    data.frame(group = "nonessential", data = rand.s, i = itr),
    data.frame(group = "essential", data = ess.rand.s, i = itr)
  )
  #print(smdf)
  rand_linkcom_df <- bind_rows(
    rand_linkcom_df,smdf
    )
}

rand_linkcom_df$i <- as.factor(rand_linkcom_df$i)
# plotBoxSign(rand_linkcom_df,0.9)
# rand_linkcom_df %>% filter(i==0.3)

#saveRDS(rand_linkcom_df,'../anne/linkcom/rand_linkcom_withdegree_df.rds')
# saveRDS(linkcom_df,'../anne/linkcom/linkcom_df.rds')
# allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds')
nones <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
# allele.frame <- readRDS('analysis/data/derived_data/df_different_alleles.rds')
ess <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F)
deg.pcc <- degree(pcc_net)
linkcom_df <- data.frame()
rand_linkcom_df <- data.frame()
for (itr in seq(0.1, 0.9, 0.1)) {
  #print(itr)
  for(rand_itr in seq(1,20)){
    fname <- paste0("../anne/linkcom/linkcom_res_rand/",rand_itr,"_", itr, ".el")
    pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F))
    rand_pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F)
    E(rand_pcc_net)$linkcom <- pcc[, 3] %>% pull()
    nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") %>% add.degree(rand_pcc_net, colnames(.)[2])

  }

  nums <- pcc %>%
    group_by(X3) %>%
    dplyr::summarise(n = dplyr::n())
  #pgs.alleles <- unlist(allele.frame$different.alleles[allele.frame$orf_name %in% pgs$orf_name])
  linkcom_df <- bind_rows(linkcom_df, data.frame(
    i = itr,
    numberofclusters = nrow(nums),
    cl1 = sum(nums$n == 1),
    cl2 = sum(nums$n == 2),
    cl3 = sum(nums$n == 3),
    cl4 = sum(nums$n == 4),
    cl5 = sum(nums$n > 4)
  ))
  # rand_pcc_net_neigh <- neighborhood(rand_pcc_net, order = 1,nodes = V(rand_pcc_net)[V(rand_pcc_net)$name %in% pgs.alleles]) %>% map(function(x) induced_subgraph(rand_pcc_net, x))


  different_clusters <- map(V(rand_pcc_net)$name, function(x) length(unique(incident_edges(rand_pcc_net, V(rand_pcc_net)[V(rand_pcc_net)$name == x])[[1]]$linkcom)))
  # numberofedges=map(V(rand_pcc_net)$name, function(x) length(incident_edges(rand_pcc_net,V(rand_pcc_net)[V(rand_pcc_net)$name==x])[[1]]$linkcom))
  names(different_clusters) <- V(rand_pcc_net)$name



  # deg_2 <- deg.pcc[deg.pcc>1]
  rand.s <- c()
  ess.rand.s <- c()
  pgs.s <- c()
  # pgs.s <- different_clusters[names(different_clusters)%in%pgs.alleles] %>% unlist()#[pgs.alleles%in%names(deg_2)]
  for (i in 1:100) {
    pgs.s <- append(pgs.s, mean(different_clusters[names(different_clusters) %in% pgs.alleles] %>% unlist())) # [pgs.alleles%in%names(deg_2)]

    s <- sample.nones(pgs.alleles[pgs.alleles %in% V(rand_pcc_net)$name], nones.exp.data %>% filter(`Allele Gene name` %in% V(rand_pcc_net)$name), colname = colnames(nones.exp.data)[2], dist = F, T, T)
    rand.s <- append(rand.s, mean(different_clusters[names(different_clusters) %in% s] %>% unlist())) # [s%in%names(deg_2)]
    ess.rand.s <- append(ess.rand.s, mean(different_clusters[names(different_clusters) %in% (ess$X1[ess$X1 %in% V(rand_pcc_net)$name] %>% sample(length(pgs.alleles[pgs.alleles %in% V(rand_pcc_net)$name])))] %>% unlist()))
  }
  smdf <- bind_rows(data.frame(group = "proto-gene", data = pgs.s, i = itr),
    data.frame(group = "nonessential", data = rand.s, i = itr),
    data.frame(group = "essential", data = ess.rand.s, i = itr)
  )
  #print(smdf)
  rand_linkcom_df <- bind_rows(
    rand_linkcom_df,smdf
    )
}

rand_linkcom_df$i <- as.factor(rand_linkcom_df$i)
# plotBoxSign(rand_linkcom_df,0.9)
# rand_linkcom_df %>% filter(i==0.3)

# saveRDS(rand_linkcom_df,'../anne/linkcom/rand_linkcom_df.rds')
# saveRDS(linkcom_df,'../anne/linkcom/linkcom_df.rds')
library(igraph)
library(tidyverse)
#linkcom_df=readRDS('../anne/linkcom/linkcom_df.rds')
library(DT)
datatable(linkcom_df,colnames = c('Parameter','Total #of clusters','Clusters with 1 edge','Clusters with 2 edge',
                                 'Clusters with 3 edge','Clusters with 4 edge','Clusters with 5 or more edge')) %>% formatStyle(names(linkcom_df),color='black')
#getwd()
library(ggsignif)
# saveRDS(cl_mean_df_list, "../anne/mcl/cl_mean_df_list.rds")
# saveRDS(clusterdf, "../anne/mcl/clusterdf.rds")
#rand_linkcom_df <- readRDS('../anne/linkcom/rand_linkcom_withdegree_df.rds')

plotBoxSign <- function(data, itr,signf=F) {
  data$group <- factor(data$group, levels = c("proto-gene", "nonessential", "essential"))
  data <- data %>% filter(i == itr)
  p12 <- 2 * sum(data$data[data$group == "proto-gene"] < data$data[data$group == "nonessential"]) / length(data$data[data$group == "proto-gene"])
  p13 <- 2 * sum(data$data[data$group == "proto-gene"] < data$data[data$group == "essential"]) / length(data$data[data$group == "proto-gene"])
  p23 <- 2 * sum(data$data[data$group == "nonessential"] < data$data[data$group == "essential"]) / length(data$data[data$group == "proto-gene"])
  if(signf){
      ggplot(data, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = c("Proto-gene","Non-essential Genes","Essential Genes")) +theme_bw()+
    ylab("Mean") + xlab("") + theme(axis.text.x = element_text(size=20),axis.title.y = element_text(size = 25),legend.position = 'none') + scale_fill_manual(name = "",values = c("#00BFC4", "#FF3300", "orange")) +xlab('')+ylab('Mean number of different clusters')+
    geom_signif(
      annotations = c(formatC(p12, digits = 3), formatC(p13, digits = 3), formatC(p23, digits = 3)),
      y_position = c(max(data$data) + 1, max(data$data) + 2, max(data$data) + 3), xmin = c(1, 1, 2), xmax = c(2, 3, 3))+ggtitle(itr)
  }else{
      ggplot(data, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels =c("Proto-gene","Non-essential Genes","Essential Genes")) +theme_bw()+
    ylab("Mean") + xlab("") + theme(axis.title.y = element_text(size = 25)) + scale_fill_manual(name = "", labels =  element_blank(),values = c("#00BFC4", "#FF3300", "orange")) + xlab('')+ylab('Mean number of different clusters')+
    ggtitle(itr)
  }
}

# map(seq(12,30,2),function(x) plotBoxSign(rand_linkcom_df,x))
# for(i in seq(12,30,2)){
#   plotBoxSign(rand_linkcom_df, i)  
# }
plotBoxSign(rand_linkcom_df, 0.1)  
plotBoxSign(rand_linkcom_df, 0.2,T)  
plotBoxSign(rand_linkcom_df, 0.3,T)  
plotBoxSign(rand_linkcom_df, 0.4,T)  
plotBoxSign(rand_linkcom_df, 0.5,F) +theme(plot.title = element_text(hjust = 0.5,size=14),legend.position = 'none',axis.text.x = element_text(size=11),axis.title.y = element_text(size=14)) +ylab('Mean number of \ndifferent clusters connected')+ggtitle('Link community clustering')+scale_x_discrete(labels =c("Proto-gene","Non-essential\nGenes","Essential\nGenes"))
ggsave('analysis/figures/0812_poster/linkcom.pdf',height = 4,width = 4,unit='in')
plotBoxSign(rand_linkcom_df, 0.6,T)  
plotBoxSign(rand_linkcom_df, 0.7,T)  
plotBoxSign(rand_linkcom_df, 0.8,T)  
# ggplot(rand_linkcom_df %>% fi, aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = element_blank()) +
#     ylab("Mean") + xlab("") + theme(axis.title.y = element_text(size = 25)) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))
plotBoxSign(rand_linkcom_df, 0.9,T)  
# ggplot(rand_linkcom_df %>% filter(i==0.9), aes(x = group, y = data, group = group, fill = group)) + geom_boxplot() + scale_x_discrete(labels = element_blank()) +
#     ylab("Mean") + xlab("") + theme(axis.title.y = element_text(size = 25)) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))
library(org.Sc.sgd.db)
OrgDb <- org.Sc.sgd.db
sgd <- AnnotationDbi::select(OrgDb, keys=keys(OrgDb), keytype='ORF',columns = 'SGD')


f2 <- function(x) {
  str_split(x, "-") %>%
    unlist() %>%
    magrittr::extract(1)
  # fs$X1 %>% map(str_split,'-') %>% unlist %>% magrittr::extract(1)
}
f <- function(x) {
  ch <- paste0("cluster", x, "
")
  allelenames <- fs$X1[fs$X3 == x & is.na(fs$X1) == F]
  genenames <- sgd$SGD[sgd$ORF%in%strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` %in% allelenames]]
  ch <- paste0(ch, paste0(as.character(genenames), collapse = "
"))
  ch <- paste0(ch, "
batch")
}

strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # %>% mutate(protname=)
for (itr in seq(0.1, 0.9, 0.1)) {

  fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el")
  pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F))
  largeclusters <- pcc %>%
  group_by(X3) %>%
  dplyr::summarise(n = dplyr::n()) %>%
  filter(n > 4) %>%
  dplyr::select(X3)

  pcc_clfiltered <- pcc %>% filter(X3 %in% largeclusters$X3)
  pcc_fl <- pcc_clfiltered[, c(2, 1, 3)]
  colnames(pcc_fl) <- colnames(pcc_clfiltered)
  fs <- bind_rows(pcc_clfiltered, pcc_fl) # %>% select(X1,X3) #%>% unique()
  fs <- fs[, c(1, 3)] %>% unique()

  map(fs$X3, f)[[1]]# %>% paste0(collapse = "") %>% writeLines(paste0('../networkGo/linkcom/',itr,'.txt'))
}

# fs$X1 <- map_chr(fs$X1,f2)
#[[1]] #%>% map(as.character) %>% append('batch') #%>% writeLines('d.txt')

# strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name`%in%fs$X1[fs$X3==11707]]
fs$X3 %>%
  unique() %>%
  length()
library(org.Sc.sgd.db)
OrgDb <- org.Sc.sgd.db
sgd <- AnnotationDbi::select(OrgDb, keys=keys(OrgDb), keytype='ORF',columns = 'SGD')


write_separate <- function(x,itr) {
  ch <- c()
  allelenames <- fs$X1[fs$X3 == x & is.na(fs$X1) == F]
  genenames <- sgd$SGD[sgd$ORF%in%strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` %in% allelenames]]
  ch <- paste0(ch, paste0(as.character(genenames), collapse = "
                          "))
  writeLines(ch,paste0('../networkGo/goatools/input/',itr,'/',x,'.txt'))
}

strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # %>% mutate(protname=)
for (itr in seq(0.1, 0.9, 0.1)) {
  dir.create(paste0('../networkGo/goatools/input/',itr))
  fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", itr, ".el")
  pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F))
  largeclusters <- pcc %>%
  group_by(X3) %>%
  dplyr::summarise(n = dplyr::n()) %>%
  filter(n > 4) %>%
  dplyr::select(X3)

  pcc_clfiltered <- pcc %>% filter(X3 %in% largeclusters$X3)
  pcc_fl <- pcc_clfiltered[, c(2, 1, 3)]
  colnames(pcc_fl) <- colnames(pcc_clfiltered)
  fs <- bind_rows(pcc_clfiltered, pcc_fl) # %>% select(X1,X3) #%>% unique()
  fs <- fs[, c(1, 3)] %>% unique()

  map(fs$X3, write_separate,itr)# %>% paste0(collapse = "") %>% writeLines(paste0('../networkGo/linkcom/',itr,'.txt'))
}


path <- '../networkGo/goatools/input/'
for(itr in list.files(path)){
  for(j in list.files(paste0(path,itr))){
    input=paste0(path,itr,'/',j)
    system(paste0("python find_enrichment.py ",input," ../sgd_costanzogenes sgd.gaf --pval=0.05 --method=fdr_bh --pval_field=p_uncorrected --outfile=",input, ".out --obo ../go.obo --ns BP --ev_exc ND"))
  }

}
###python find_enrichment.py deneme ../sgd_costanzogenes sgd.gaf --pval=0.05 --method=fdr_bh --pval_field=p_uncorrected --outfile=out_bon.xlsx --obo ../go.obo 
  fname <- paste0("../anne/linkcom/linkcom_res/pcc_elist_", 0.5, ".el")
  pcc <- suppressMessages(read_delim(fname, delim = " ", col_names = F))
#pcc <- read_delim("../anne/linkcom/pccwedgelist.el", delim = " ", col_names = F)
pcc_net <- graph_from_edgelist(as.matrix(pcc[c(1, 2)]), directed = F)
E(pcc_net)$linkcom <- pcc$X3

pcc %>% filter(X1=='dgr1'|X1=='ybr196c-a')
#pcc_clfiltered_nodes <- c(pcc_clfiltered$X1, pcc_clfiltered$X2) %>% unique()
# pcc_net_filtered <-  graph_from_edgelist(as.matrix(pcc_clfiltered[c(1,2)]),directed = F)
#pcc_net_filtered <- induced_subgraph(pcc_net, V(pcc_net)[V(pcc_net)$name %in% pcc_clfiltered_nodes])
#V(pcc_net)$name[V(pcc_net)$name %in% pgs.alleles] %>% map(function(x) neighborhood(pcc_net, nodes = x))
get.edgelist(ybr_sub)
ybr_sub <- induced_subgraph(pcc_net, vids = neighborhood(pcc_net, order = 1, nodes = "pex4") %>% unlist())
plot(ybr_sub, vertex.size=2,edge.color = E(ybr_sub)$linkcom, layout = layout.kamada.kawai)
E(ybr_sub)$linkcom %>% unique() # %>% E() #%>% magrittr::extract2('linkcom')#$linkcom 
write_graph(ybr_sub,format='graphml','~/Downloads/pex_sub.graphml')


net <- pcc_net

ybr <- neighborhood(net,order=1,nodes='ybr196c-a')
ybr_net <- induced_subgraph(net,ybr[[1]]) #%>% #plot(vertex.size=2,edge.color = E(.)$linkcom, layout = layout.kamada.kawai)
ybr_net
degree(net,'ybr196c-a')
exp.number.data %>% add.degree(net,colnames(exp.number.data)[2]) %>% filter(deg==degree(net,'ybr196c-a'))
#sample.nones('ybr196c-a',exp.number.data %>% add.degree(net,colnames(exp.number.data)[2]),colnames(exp.number.data)[2],dist=T,array = F)

hri <- neighborhood(net,order=1,nodes='sec13-1')
hrnet <- induced_subgraph(net,hri[[1]]) #%>%  plot(vertex.size=2,edge.color = E(.)$linkcom, layout = layout.kamada.kawai)

E(induced_subgraph(net,hri[[1]]))$linkcom %>% unique
E(induced_subgraph(net,ybr[[1]]))$linkcom %>% unique

library(RCy3)
induced_subgraph(net,ybr[[1]]) %>%createNetworkFromIgraph("myIgraph")
layoutNetwork('force-directed defaultSpringLength=70 defaultSpringCoefficient=0.000003')
#RColorBrewer::brewer.pal(length(ybr[[1]])-1,'Paired')
setEdgeColorMapping('linkcom', E(ybr_net)$linkcom %>% unique(),RColorBrewer::brewer.pal(length(ybr[[1]])-1,'Paired'),mapping.type = 'd')
setBackgroundColorDefault('#ffffff')
setEdgeLineWidthDefault(12)
setNodeColorBypass('ybr196c-a','#00CCCC')
setNodeSizeDefault(50)
setEdgeOpacityDefault(255)
#?setNodeOpacity
exportImage("analysis/figures/0812_poster/ybr_image_0814", "PNG",resolution=300, height = 4, width = 4,units='inches')

induced_subgraph(net,hri[[1]]) %>%createNetworkFromIgraph("hr")
layoutNetwork('force-directed defaultSpringLength=100 defaultSpringCoefficient=0.0000003')
setEdgeColorMapping('linkcom', E(hrnet)$linkcom %>% unique(),RColorBrewer::brewer.pal(length(hri[[1]])-1,'Paired'),mapping.type = 'd')
setNodeColorBypass('sec13-1','#ffa500')
exportImage("analysis/figures/0812_poster/sec_image_0814", "PNG",resolution=300, height = 4, width = 4,units='inches')

pgs_sub <- pgs.subnetwork(net,pgs_allele)
induced_subgraph(pgs_sub,names(components(pgs_sub)$membership[components(pgs_sub)$membership==1]))%>% createNetworkFromIgraph('pcc_pgs')
setNodeSizeMapping('cat',c('gene','proto-gene'),sizes=c(30,60),mapping.type = 'd')
layoutNetwork('kamada-kawai')# defaultSpringLength=100 defaultSpringCoefficient=0.0000003')
setEdgeColorDefault('#000000')
setBackgroundColorDefault('#ffffff')
setEdgeOpacityDefault(255)

fitContent()
exportImage("analysis/figures/0812_poster/pgs_sub", "PDF",resolution=300, height = 5, width = 5,units='inches')


oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.