R/subgraph_plots_cytoscape.R

library(tidyverse)
library(igraph)
library(RCy3)


## PCC Input-----------
library(ggsignif)
library(cowplot)
library(data.table)
source("R/pgs.subnetwork.R")
source("R/add.degree.R")
source("R/sample.nones.R")
source("R/plot_data.R")
getwd()
# set.seed(1)
# pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv")
# read_csv("~/Main/pgsNetwork/data/0701_proto-genes_list")
# pgs <- read_csv("~/Downloads/omersyntenyfilter.csv")
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
costanzo <- T
if (costanzo == F) {
  # read my pcc values

  ndund1 <- "analysis/data/derived_data/pcc_calculations_rcpp/nodelist_und1"
  und1pcc <- "analysis/data/derived_data/pcc_calculations_rcpp/und12avgpcc"
  ndavgnorm <- "analysis/data/derived_data/pcc_calculations_rcpp/ndlistavgpcc_calc_normalized_matrix.csv"
  avgnorm <- "analysis/data/derived_data/pcc_calculations_rcpp/avgpcc_calc_normalized_matrix.csv"
  pcc0703 <- "analysis/data/derived_data/pcc_calculations_rcpp/pcc_calc_0703"
  ndlist0703 <- "analysis/data/derived_data/pcc_calculations_rcpp/nodelist_0703"
  pccnorm <- "analysis/data/derived_data/pcc_calculations/pcc_calc_normalized_matrix.csv"
  ndlistpccnorm <- "analysis/data/derived_data/pcc_calculations/ndlistpcc_calc_normalized_matrix.csv"
  # ndlist <- fread(ndlist_fname,header=F)
  # adj_res <- fread(adj_res_fname,header=F)
  adj_res <- fread(pcc0703, header = F)
  ndlist <- fread(ndlist0703, header = F)

  adj_res <- as.matrix(adj_res)
  colnames(adj_res) <- ndlist$V1
  rownames(adj_res) <- ndlist$V1

  adj_res_graph <- graph_from_adjacency_matrix(adj_res, weighted = TRUE, mode = "undirected", diag = F)
  g <- delete.vertices(adj_res_graph, which(str_detect(V(adj_res_graph)$name, "supp") | V(adj_res_graph)$name %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name]))


  g_final <- delete.edges(g, which(E(g)$weight < .2 | is.na(E(g)$weight) | is.nan(E(g)$weight)))

  g_final <- delete.vertices(g_final, which(degree(g_final) < 1))
  pcc.net <- g_final # graph_from_data_frame(df.pcc,F)
  # E(pcc.net)$weight <- df.pcc$pcc
  nw.del <- igraph::simplify(pcc.net, remove.loops = T)
} else {
  costanzo_matrix_data <- fread("analysis/data/raw_data//pcc_ALL.txt", header = T)
  costanzo_matrix <- costanzo_matrix_data[-1, -c(1, 2)] %>% as.matrix()
  rownames(costanzo_matrix) <- colnames(costanzo_matrix)
  class(costanzo_matrix) <- "numeric"
  adj_res_graph <- graph_from_adjacency_matrix(costanzo_matrix, weighted = TRUE, mode = "undirected", diag = F)
  g <- delete.vertices(adj_res_graph, which(str_detect(V(adj_res_graph)$name, "supp") | V(adj_res_graph)$name %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name])) #
  # load('~/Main/anne/network_analysis/df.pcc.Rdata')
  # df.pcc <- df.pcc %>% filter((pcc)>=0.2) %>% drop_na()

  g_final <- delete.edges(g, which(E(g)$weight < .2 | is.na(E(g)$weight) | is.nan(E(g)$weight)))

  g_final <- delete.vertices(g_final, which(degree(g_final) < 1))
  pcc.net <- g_final # graph_from_data_frame(df.pcc,F)
  # E(pcc.net)$weight <- df.pcc$pcc
  nw.del <- igraph::simplify(pcc.net, remove.loops = T)
}



allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv") %>% mutate(group = ifelse(`Systematic gene name` %in% pgs$orf_name, "proto-gene", maincat))
try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F)
essentials <- exp.number.data$`Allele Gene name`[exp.number.data$maincat == "essential"]


## PCC Plot-----------

pgs_allele <- strain_ids %>%
  filter(`Systematic gene name` %in% pgs$orf_name) %>%
  select(`Allele Gene name`) %>%
  pull()

net <- nw.del # graph_from_data_frame(net_df_significant_sl[,c(1,3)],directed = F)

pgs_sub <- pgs.subnetwork(net, pgs_allele)
induced_subgraph(pgs_sub, names(components(pgs_sub)$membership[components(pgs_sub)$membership == 1])) %>% createNetworkFromIgraph("pcc_pgs")
setNodeSizeMapping("cat", c("gene", "proto-gene"), sizes = c(30, 60), mapping.type = "d")
layoutNetwork("kamada-kawai") # defaultSpringLength=100 defaultSpringCoefficient=0.0000003')
setEdgeColorDefault("#000000")
setBackgroundColorDefault("#ffffff")
setEdgeOpacityDefault(255)

fitContent()
exportImage("analysis/figures/0821_paper/pgs_sub", "PDF", resolution = 300, height = 5, width = 5, units = "inches")



# Interaction Input
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
net.df <- readRDS("analysis/data/derived_data/SGA_data_combined.rds.gz")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)

net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.2) # & `Double mutant fitness standard deviation`<=0.1)

allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv") %>% mutate(group = ifelse(`Systematic gene name` %in% pgs$orf_name, "proto-gene", maincat))

try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
ess.names <- exp.data$`Systematic gene name`[exp.data$maincat == "essential"]

net_edgelist <- as.matrix(net_df_significant_sl[, c(1, 3)])
net <- graph_from_edgelist(net_edgelist, directed = F)

pgs_sub <- pgs.subnetwork(net, pgs$orf_name)
induced_subgraph(pgs_sub, names(components(pgs_sub)$membership[components(pgs_sub)$membership == 1])) %>% createNetworkFromIgraph("int_pgs")
setNodeSizeMapping("cat", c("gene", "proto-gene"), sizes = c(50, 60), mapping.type = "d")
layoutNetwork("kamada-kawai") # defaultSpringLength=100 defaultSpringCoefficient=0.0000003')
setEdgeColorDefault("#000000")
setBackgroundColorDefault("#ffffff")
setEdgeOpacityDefault(100)

fitContent()
exportImage("analysis/figures/0821_paper/pgs_sub_int_2", "PDF", resolution = 300, height = 2, width = 4, units = "inches")

(ggplot() + geom_histogram(aes(x = deg[names(deg) %in% pgs$orf_name], fill = "white"), binwidth = 1) + theme_classic() +theme(legend.position='none')+ scale_fill_manual(values = c("#1CBDC2"))) %>%
  ggsave("analysis/figures/0821_paper/histogram.pdf", width = 1.5, height = 1.5, plot = .)
oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.