set.seed(1)
## Read Inputs files----------------
library(tidyverse)
library(igraph)
library(ggsignif)
library(cowplot)
library(data.table)
source("R/pgs.subnetwork.R")
source("R/add.degree.R")
source("R/sample.nones.R")
source("R/plot_data.R")
getwd()
#set.seed(1)
#pgs <- read_csv("~/Main/pgsNetwork/analysis/data/derived_data/filtering/costanzoplusmypcc_0815.csv")
#read_csv("~/Main/pgsNetwork/data/0701_proto-genes_list")
#pgs <- read_csv("~/Downloads/omersyntenyfilter.csv")
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
costanzo=T
if(costanzo==F){
#read my pcc values
ndund1='analysis/data/derived_data/pcc_calculations_rcpp/nodelist_und1'
und1pcc <- 'analysis/data/derived_data/pcc_calculations_rcpp/und12avgpcc'
ndavgnorm <- 'analysis/data/derived_data/pcc_calculations_rcpp/ndlistavgpcc_calc_normalized_matrix.csv'
avgnorm <- 'analysis/data/derived_data/pcc_calculations_rcpp/avgpcc_calc_normalized_matrix.csv'
pcc0703 <- 'analysis/data/derived_data/pcc_calculations_rcpp/pcc_calc_0703'
ndlist0703 <- 'analysis/data/derived_data/pcc_calculations_rcpp/nodelist_0703'
pccnorm <- 'analysis/data/derived_data/pcc_calculations/pcc_calc_normalized_matrix.csv'
ndlistpccnorm <- 'analysis/data/derived_data/pcc_calculations/ndlistpcc_calc_normalized_matrix.csv'
# ndlist <- fread(ndlist_fname,header=F)
# adj_res <- fread(adj_res_fname,header=F)
adj_res <- fread(pcc0703,header=F)
ndlist <- fread(ndlist0703,header=F)
adj_res <- as.matrix(adj_res)
colnames(adj_res) <- ndlist$V1
rownames(adj_res) <- ndlist$V1
adj_res_graph <- graph_from_adjacency_matrix(adj_res,weighted=TRUE,mode='undirected',diag=F)
g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))
g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight)))
g_final <- delete.vertices(g_final,which(degree(g_final)<1 ))
pcc.net <- g_final#graph_from_data_frame(df.pcc,F)
#E(pcc.net)$weight <- df.pcc$pcc
nw.del<-igraph::simplify(pcc.net,remove.loops = T)
}else{
costanzo_matrix_data <- fread('analysis/data/raw_data//pcc_ALL.txt',header=T)
costanzo_matrix <- costanzo_matrix_data[-1,-c(1,2)]%>%as.matrix()
rownames(costanzo_matrix) <- colnames(costanzo_matrix)
class(costanzo_matrix) <- "numeric"
adj_res_graph <- graph_from_adjacency_matrix(costanzo_matrix,weighted=TRUE,mode='undirected',diag=F)
g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))#
# load('~/Main/anne/network_analysis/df.pcc.Rdata')
# df.pcc <- df.pcc %>% filter((pcc)>=0.2) %>% drop_na()
g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight)))
g_final <- delete.vertices(g_final,which(degree(g_final)<1 ))
pcc.net <- g_final#graph_from_data_frame(df.pcc,F)
# E(pcc.net)$weight <- df.pcc$pcc
nw.del<-igraph::simplify(pcc.net,remove.loops = T)
}
allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds")
nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv")
exp.number.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_all.csv")%>% mutate(group=ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat))
try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,]
lengths <- lapply(try$different.alleles, length)
try.multiple <- try[lengths > 1, ]
try.single <- try[lengths == 1, ]
ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F)
essentials <- exp.number.data$`Allele Gene name`[exp.number.data$maincat=='essential']
#read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) %>% dplyr::select(X1) %>% pull()
# Simulation -----------
enm_res <- read_tsv('analysis/data/derived_data/enm/sqfs_500modes.tsv',col_names = c('gene1','sqfs'))
sim_n=10
rand_enm <- vector("list",sim_n)
rand_ess_enm <- vector("list",sim_n)
pgs_enm <- vector("list",sim_n)
colname = colnames(nones.exp.data)[2]
for (i in 1:sim_n) {
allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1)))
pgs_allele <- strain_ids %>%
filter(`Systematic gene name` %in% pgs$orf_name) %>%
select(`Allele Gene name`) %>%
pull()
allnet <- induced.subgraph(nw.del, V(nw.del)[V(nw.del)$name%in%allnw.list]) %>%
delete.vertices(which(degree(.)==0))
nones.exp.data <- nones.exp.data %>%
add.degree(allnet, colname) %>%
dplyr::mutate(int_density = deg / num)
exp.number.data.filtered <- exp.number.data %>%
add.degree(allnet, colname) %>%
mutate(int_density = deg / num) %>%
filter(`Allele Gene name` %in% allnw.list)
allnet.deg <- degree(allnet)
allnet.btw <- betweenness(allnet, weights = NA,normalized = T)
# allnet.eig <- eigen_centrality(allnet)$vector
allnet.trans <- transitivity(allnet,'local')
names(allnet.trans) <- names(allnet.deg)
# nn<-pgs.subnetwork(allnet, pgs$orf_name)
# giant_comp_nn <- induced_subgraph(nn,V(nn)[components(nn)$membership==which.max(components(nn)$csize)])
# giant_nodes <- append(giant_nodes,vcount(giant_comp_nn))
# giant_edges <- append(giant_edges,ecount(giant_comp_nn))
for (t in 1:100) {
#s <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=F)
s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name], nones.exp.data %>% filter(!!sym(colname)%in%V(allnet)$name), colname,dist=T)
#ess.s <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele%in% V(allnet)$name])),colname,array=FALSE,dist=F)
ess.s.dist <- sample.nones(pgs_allele[pgs_allele %in% V(allnet)$name],exp.number.data.filtered %>% filter(!!sym(colname)%in%V(allnet)$name& (!!sym(colname)%in%essentials)|(!!sym(colname)%in%pgs_allele[pgs_allele %in% V(allnet)$name])),colname,array=FALSE,dist=T)
rand_enm[[i]][t] <- list(enm_res %>% filter(gene1%in%s.dist) %>% select(sqfs) %>% pull())
pgs_enm[[i]][t] <- list(enm_res %>% filter(gene1%in%pgs_allele) %>% select(sqfs) %>% pull())
rand_ess_enm[[i]][t] <- list(enm_res %>% filter(gene1%in%ess.s.dist) %>% select(sqfs) %>% pull())
}
print(i)
}
rand_enm_mean <- map(rand_enm,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_enm <- unlist(rand_enm)
rand_ess_enm_mean <- map(rand_ess_enm,function(x) map(x,mean, na.rm=T)) %>% unlist()
rand_ess_enm <- unlist(rand_ess_enm)
pgs_enm_mean <- map(pgs_enm,function(x) map(x,mean, na.rm=T)) %>% unlist()
pgs_enm <- unlist(pgs_enm)
## r enm-betweenness-trans plots------------
d1=plot_data(pgs_enm_mean,rand_enm_mean,rand_ess_enm_mean,pgs_enm,rand_enm,rand_ess_enm,title = 'enm',binwidth=0.1)[[1]]
d1
#plot_grid(d1,d2,d3,ncol=3)
## r enm-betweenness-trans plots ecdf-----------
d1=plot_data_ecdf(pgs_enm,rand_enm,rand_ess_enm,title = 'enm',binwidth=0.1,logscale = T)
strain_ids <- read_csv('analysis/data/derived_data/strain_ids_with_experiment_count_all.csv') %>% mutate(group = ifelse(`Systematic gene name`%in%pgs$orf_name,'proto-gene',maincat))
enm_cat <- enm_res %>% filter(gene1%in%strain_ids$`Allele Gene name`) %>% left_join(strain_ids,by=c('gene1'='Allele Gene name'))
ggplot(enm_cat,aes(x=group,y=sqfs))+stat_summary(fun.y='mean',geom='bar')+stat_summary(fun.data='mean_se',geom='errorbar')
#ggplot(enm_cat,aes(x=sqfs,y=..density..,color=group))+geom_freqpoly(binwidth=0.5)
ggplot(enm_cat,aes(x=sqfs))+geom_bar(stat="density" )
p_ess_pgs = coin::independence_test(group~sqfs,enm_cat%>%filter(group!='nonessential')%>%mutate(group=factor(group)))
p_nones_pgs = coin::independence_test(group~sqfs,enm_cat%>%filter(group!='essential'&gene1!='yil086c'&gene1!='ylr412c-a')%>%mutate(group=factor(group)))
p_ess_nones = coin::independence_test(group~sqfs,enm_cat%>%filter(group!='proto-gene')%>%mutate(group=factor(group)))
## compare node by node
net = nw.del
all_deg = degree(net)
pgs_data_list = vector('numeric', length(pgs_allele))
compared_mean_list = vector('numeric', length(pgs_allele))
pgs_deg = vector('numeric',length(pgs_allele))
se_list = vector('numeric',length(pgs_allele))
for(i in seq_along(pgs_allele)){
orf_name = pgs_allele[i]
if(orf_name%in%V(net)$name==F) next
orf_deg = all_deg[names(all_deg)==orf_name]
same_deg_list = names(all_deg)[all_deg==orf_deg&names(all_deg)!=orf_name&names(all_deg)%in%strain_ids$`Allele Gene name`[strain_ids$group=='nonessential']]
orf_sqfs = enm_res %>% filter(gene1 == orf_name) %>% select(sqfs) %>% pull()
same_deg_sqfs =same_deg_mean_sqfs = enm_res %>% filter(gene1%in%same_deg_list) %>% select(sqfs) %>% pull()
same_deg_mean_sqfs = same_deg_sqfs %>% mean(na.rm=T)
se = sd(same_deg_sqfs)/length(same_deg_sqfs)
pgs_data_list[i]=orf_sqfs
compared_mean_list[i]=same_deg_mean_sqfs
pgs_deg[i] = orf_deg
se_list[i] = se
}
df = tibble(orf_name = pgs_allele, sqfs = pgs_data_list, mean = compared_mean_list, se = se_list, degree = pgs_deg)
ggplot(df %>% filter(degree>1),aes(x=orf_name))+
geom_point(aes(y=sqfs),stat='identity',color='red')+
geom_point(aes(y=mean), stat="identity", color="black",
position=position_dodge(.9)) +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=.2,
position=position_dodge(.9))# +ylim(NA,2)
ggsave('analysis/figures/0904_enm/enm_compare_meanse.pdf',width = 20,height = 12)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.