This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(tidyverse) library(igraph) source("R/sample.nones.R") #ndlist <- read_delim('analysis/data/derived_data/pcc_calculations/nodelist_nonoverlap.txt',delim=' ',col_names = FALSE) #adj_res <- read_delim('analysis/data/derived_data/pcc_calculations/pccout_nonoverlap.txt',delim=' ',col_names=FALSE) ndlist <- read_delim('analysis/data/derived_data/pcc_calculations/allnodelist.txt',delim=' ',col_names = FALSE) adj_res <- read_delim('analysis/data/derived_data/pcc_calculations/allnodes_pccout.txt',delim=' ',col_names=FALSE) adj_res <- as.matrix(adj_res) colnames(adj_res) <- ndlist$X1 rownames(adj_res) <- ndlist$X1 #head(adj_res) adj_res_graph <- graph_from_adjacency_matrix(adj_res,weighted=TRUE,mode='undirected') g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp'))) g_final=delete.edges(g, which(E(g)$weight <.2)) g_final <- delete.vertices(g_final,which(degree(g_final)<1 )) genomic_neighbors <- read_delim("analysis/data/derived_data/neighbor_genes", delim = ",") colnames(genomic_neighbors) <- c("orf_name", "up", "down") pgs <- read_csv("~/Downloads/omersyntenyfilter.csv") # load("analysis/data/derived_data/df.pcc.Rdata") # saveRDS(df.pcc,'analysis/data/derived_data/df_pcc.rds') #df_pcc <- readRDS("analysis/data/derived_data/df_pcc.rds") overlappingorfs <- read.csv("../anne/network_analysis/overlappingorfs.csv", stringsAsFactors = FALSE)
ess.names <- read_csv("../anne/network_analysis/0530essential/ess_names.csv", col_names = F) ess.alleles <- read_csv("../anne/network_analysis/0530essential/ess_alleles.csv", col_names = F) strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # df_pcc_filtered <- df_pcc %>% # filter(str_detect(X1, "supp") == F & str_detect(q, "supp") == F & X1 %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & q %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & # pcc >= 0.2) %>% # drop_na() pcc_net <- g#graph_from_data_frame(df_pcc_filtered, F) # E(pcc_net)$weight <- df_pcc_filtered$pcc pcc_net_simpl <- pcc_net# %>% igraph::simplify(edge.attr.comb = "mean") V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x])
i <- 1 genomic_neighbors <- genomic_neighbors %>% mutate(updata = NA, downdata = NA, upess = NA, downess = NA, geneess = NA) neig <- genomic_neighbors[i, c(2, 3)] %>% as.character() change_edgeformat <- function(gene, up, down) { v <- c() map(gene, function(x) append(x, )) } # subnetwork contatining gene with up and down neighbors s <- map(1:nrow(genomic_neighbors), function(i) ecount(induced_subgraph(pcc_net_simpl, V(pcc_net_simpl)[V(pcc_net_simpl)$sgd %in% (genomic_neighbors[i, ] %>% as.character())]))) for (i in 1:nrow(genomic_neighbors)) { # print(i) gallele <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd == genomic_neighbors$orf_name[i]] allelnamesup <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 2]] allelnamesdown <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 3]] c <- tidyr::crossing(allelnamesup, gallele) # %>% filter(allelnamesup %in% gallele & allelnamesup1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesup) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, as.character(c[t, ])) } # [1,] %>% as.character #E(pcc_net_simpl)[get.edge.ids(pcc_net_simpl, v)] genomic_neighbors[i,]$updata <- mean(edge_attr(pcc_net_simpl, "weight", get.edge.ids(pcc_net_simpl, v))) c <- tidyr::crossing(allelnamesdown, gallele) # %>% filter(allelnamesdown %in% gallele & allelnamesdown1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, as.character(c[t, ])) } # [1,] %>% as.character #E(pcc_net_simpl)[get.edge.ids(pcc_net_simpl, v)] genomic_neighbors[i,]$downdata <- mean(edge_attr(pcc_net_simpl, "weight", get.edge.ids(pcc_net_simpl, v))) genomic_neighbors[i, ]$upess <- ifelse(any(allelnamesup %in% ess.alleles$X1), T, F) genomic_neighbors[i, ]$downess <- ifelse(any(allelnamesdown %in% ess.alleles$X1), T, F) genomic_neighbors[i, ]$geneess <- ifelse(any(gallele %in% ess.alleles$X1), T, F) } genomic_neighbors[s > 1, ] %>% filter((updata>=0.1&downdata>=0.1)) neighboreffects=genomic_neighbors%>% filter((updata>=0.1|downdata>=0.1)&orf_name%in%strain_ids$`Systematic gene name`) #genomic_neighbors %>% saveRDS("analysis/data/derived_data/neighbor_pcc_data_withnewpcc.rds") #genomic_neighbors_all <- genomic_neighbors #genomic_neighbors_nonoverlap <- genomic_neighbors #genomic_neighbors_all%>% filter(orf_name%in%pgs$orf_name&(updata<0.1&downdata<0.1))
aaprot <- read_csv("~/Downloads/proto_novel") aafinal <- read_csv('~/Downloads/proto_genes',col_names = F) myaddition=pgs %>% filter(orf_name%in%aaprot$x==F) myremoval=aafinal %>% filter(X1%in%pgs$orf_name==F) aaprotfilteredwithme <- aaprot %>% filter(x%in%myremoval$X1==F) %>% bind_rows(data.frame(x=myaddition$orf_name)) t=genomic_neighbors[s>1,] %>% filter(orf_name %in% aaprotfilteredwithme$x & up %in% strain_ids$`Systematic gene name` & down %in% strain_ids$`Systematic gene name` & (updata < 0.1 & downdata < 0.1)) pgs=data.frame(orf_name=t$orf_name)
# V(pcc_net)$sgd <- map_chr(V(pcc_net)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) genomic_neighbors_pgs <- genomic_neighbors %>% filter(orf_name %in% pgs$orf_name) for (i in 1:nrow(genomic_neighbors_pgs)) { # print(i) gallele <- V(pcc_net)$name[V(pcc_net)$sgd == genomic_neighbors_pgs$orf_name[i]] allelnamesup <- V(pcc_net)$name[V(pcc_net)$sgd %in% genomic_neighbors_pgs[i, 2]] allelnamesdown <- V(pcc_net)$name[V(pcc_net)$sgd %in% genomic_neighbors_pgs[i, 3]] c <- tidyr::crossing(allelnamesup, gallele) # %>% filter(allelnamesup %in% gallele & allelnamesup1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesup) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, as.character(c[t, ])) } # [1,] %>% as.character # E(pcc_net)[get.edge.ids(pcc_net, v)] genomic_neighbors_pgs[i, ]$updata <- mean(edge_attr(pcc_net, "weight", get.edge.ids(pcc_net, v))) c <- crossing(allelnamesdown, gallele) # %>% filter(allelnamesdown %in% gallele & allelnamesdown1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnames) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, as.character(c[t, ])) } # [1,] %>% as.character # E(pcc_net)[get.edge.ids(pcc_net, v)] genomic_neighbors_pgs[i, ]$downdata <- mean(edge_attr(pcc_net, "weight", get.edge.ids(pcc_net, v))) } genomic_neighbors_pgs %>% filter((updata > 0.1 | downdata > 0.1) == F) %>% select(-mean) %>% drop_na() aafinal <- read_csv('~/Downloads/proto_genes',col_names = F) myaddition=pgs[pgs$orf_name%in%aaprot$x==F,] myremoval=aafinal %>% filter(X1%in%pgs$orf_name==F) aaprot <- read_csv("~/Downloads/proto_novel") %>% filter(x%in%myremoval$X1==F) %>% bind_rows(data_frame(x=myaddition$orf_name)) genomic_neighbors %>% filter(orf_name %in% aaprot$x & up %in% strain_ids$`Systematic gene name` & down %in% strain_ids$`Systematic gene name`) %>% #select(-mean) %>% drop_na() genomic_neighbors %>% filter(orf_name %in% aaprot$x & up %in% strain_ids$`Systematic gene name` & down %in% strain_ids$`Systematic gene name` & (updata > 0.1 | downdata > 0.1)) genomic_neighbors %>% filter(orf_name %in% aaprot$x & up %in% strain_ids$`Systematic gene name` & down %in% strain_ids$`Systematic gene name` & (updata > 0.1 | downdata > 0.1) == F) u=genomic_neighbors[s > 1, ] %>% filter(orf_name%in%pgs$orf_name&(updata<0.1&downdata<0.1)) t=genomic_neighbors[s>1,] %>% filter(orf_name %in% aaprot$x & up %in% strain_ids$`Systematic gene name` & down %in% strain_ids$`Systematic gene name` & (updata < 0.1 & downdata < 0.1)) pgs=data.frame(orf_name=t$orf_name)
# read noness and ess names # plot boxplot of updata and down data
source("R/add.degree.R") source("R/dist.sample.R") set.seed(1) allele.frame <- readRDS("analysis/data/derived_data/df_different_alleles.rds") # nones.exp.data <- read_csv('analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv') try <- allele.frame # [allele.frame$orf_name%in%summary$orf_name,] lengths <- lapply(try$different.alleles, length) try.multiple <- try[lengths > 1, ] try.single <- try[lengths == 1, ] strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") df_pcc_filtered <- df_pcc %>% filter(str_detect(X1, "supp") == F & str_detect(q, "supp") == F & X1 %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & q %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & pcc >= 0.2) %>% drop_na() library(igraph) pcc_net <- graph_from_data_frame(df_pcc_filtered, F) E(pcc_net)$weight <- df_pcc_filtered$pcc pcc_net_simpl <- pcc_net %>% igraph::simplify(edge.attr.comb = "mean") nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") %>% add.degree(pcc_net, colnames(.)[2]) V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) genomic_neighbors <- readRDS("analysis/data/derived_data/neighbor_pcc_data.rds") nw_dist_data <- data.frame() # V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) for (u in 1:10) { allnw.list <- c(unlist(try.single$different.alleles), unlist(lapply(try.multiple$different.alleles, sample, 1))) pcc_net_simpl <- pcc_net %>% igraph::simplify(edge.attr.comb = "mean") %>% induced.subgraph(., V(.)[V(.)$name %in% allnw.list]) V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) genomic_neighbors <- genomic_neighbors %>% mutate(nw_dist_down = NA, nw_dist_up = NA) for (i in 1:nrow(genomic_neighbors)) { gallele <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd == genomic_neighbors$orf_name[i]] allelnamesup <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 2]] allelnamesdown <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 3]] c <- tidyr::crossing(allelnamesup, gallele) %>% as.data.frame() # %>% filter(allelnamesup %in% gallele & allelnamesup1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesup) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, distances(pcc_net_simpl, c[t, 1], c[t, 2], weights = NA)) } genomic_neighbors[i, ]$nw_dist_up <- min(v, na.rm = T) c <- tidyr::crossing(allelnamesdown, gallele) %>% as.data.frame() # %>% filter(allelnamesdown %in% gallele & allelnamesdown1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, distances(pcc_net_simpl, c[t, 1], c[t, 2], weights = NA)) } genomic_neighbors[i, ]$nw_dist_down <- min(v, na.rm = T) } pgs_in_network <- pgs$orf_name[pgs$orf_name %in% V(pcc_net_simpl)$sgd] for (itr in 1:100) { pgs_nw_dist <- genomic_neighbors %>% filter(orf_name %in% pgs_in_network) %>% select(nw_dist_up, nw_dist_down) %>% drop_na() pgs_nw_dist <- c(pgs_nw_dist$nw_dist_up, pgs_nw_dist$nw_dist_down) nones_nw_dist <- genomic_neighbors %>% filter(orf_name %in% sample.nones(pgs_in_network, nones.exp.data[nones.exp.data$`Allele Gene name` %in% V(pcc_net_simpl)$name, ], colnames(nones.exp.data)[1], dist = F)) %>% select(nw_dist_up, nw_dist_down) %>% drop_na() nones_nw_dist <- c(nones_nw_dist$nw_dist_up, nones_nw_dist$nw_dist_down) # allnet.deg <- degree(pcc_net_simpl) # names(allnet.deg) <- V(pcc_net_simpl)$sgd # ess.sample <- allnet.deg[names(allnet.deg)%in%pgs_in_network]%>%hist(plot=F,breaks = -1:max(.))%>%.$counts%>%dist.sample(pcc_net_simpl,.,ess.alleles$X1) # ess.sample <- V(pcc_net_simpl)$sgd[V(pcc_net_simpl)$name%in%ess.sample] ess.sample <- sample(ess.names$X1, length(pgs_in_network)) ess_nw_dist <- genomic_neighbors %>% filter(orf_name %in% ess.sample) %>% select(nw_dist_up, nw_dist_down) %>% drop_na() ess_nw_dist <- c(ess_nw_dist$nw_dist_up, ess_nw_dist$nw_dist_down) nw_dist_data <- suppressWarnings(bind_rows(nw_dist_data, data.frame(data = mean(pgs_nw_dist[is.infinite(pgs_nw_dist) == F]), group = "proto-genes"), data.frame(data = mean(nones_nw_dist[is.infinite(nones_nw_dist) == F]), group = "nonessential"), data.frame(data = mean(ess_nw_dist[is.infinite(ess_nw_dist) == F]), group = "essential"))) } print(u) } nw_dist_data$group <- factor(nw_dist_data$group, levels = c("proto-genes", "nonessential", "essential")) ggplot(nw_dist_data, aes(x = group, fill = group, y = data)) + geom_boxplot() #+ ggpubr::stat_compare_means() # boxplot(pgs_nw_dist,nones_nw_dist,ess_nw_dist) # nw_dist_data_deg <- nw_dist_data saveRDS(nw_dist_data, "analysis/data/derived_data/nw_dist_data_nodeg.rds")
source("R/add.degree.R") strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") # df_pcc_filtered <- df_pcc %>% # filter(str_detect(X1, "supp") == F & str_detect(q, "supp") == F & X1 %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & q %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & # pcc >= 0.2) %>% # drop_na() library(igraph) pcc_net <- g_final#graph_from_data_frame(df_pcc_filtered, F) #E(pcc_net)$weight <- df_pcc_filtered$pcc pcc_net_simpl <- pcc_net #%>% igraph::simplify(edge.attr.comb = "mean") nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") %>% add.degree(pcc_net, colnames(.)[2]) V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) #genomic_neighbors <- readRDS("analysis/data/derived_data/neighbor_pcc_data.rds") # V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) genomic_neighbors <- genomic_neighbors %>% mutate(nw_dist_down = NA, nw_dist_up = NA) for (i in 1:nrow(genomic_neighbors)) { gallele <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd == genomic_neighbors$orf_name[i]] allelnamesup <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 2]] allelnamesdown <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 3]] c <- tidyr::crossing(allelnamesup, gallele) %>% as.data.frame() # %>% filter(allelnamesup %in% gallele & allelnamesup1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesup) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, distances(pcc_net_simpl, c[t, 1], c[t, 2], weights = NA)) } genomic_neighbors[i, ]$nw_dist_up <- min(v, na.rm = T) c <- tidyr::crossing(allelnamesdown, gallele) %>% as.data.frame() # %>% filter(allelnamesdown %in% gallele & allelnamesdown1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, distances(pcc_net_simpl, c[t, 1], c[t, 2], weights = NA)) } genomic_neighbors[i, ]$nw_dist_down <- min(v, na.rm = T) } nw_dist_data <- data.frame() pgs_in_network <- pgs$orf_name[pgs$orf_name %in% V(pcc_net_simpl)$sgd] set.seed(1) for (itr in 1:1) { pgs_nw_dist <- genomic_neighbors %>% filter(orf_name %in% pgs_in_network) %>% select(nw_dist_down, nw_dist_up) %>% #select(updata, downdata) %>% drop_na() pgs_nw_dist <- c(pgs_nw_dist$nw_dist_down, pgs_nw_dist$nw_dist_up) # pgs_nw_dist <- c(pgs_nw_dist$updata, pgs_nw_dist$downdata) nones_nw_dist <- genomic_neighbors %>% filter(orf_name %in% nones.exp.data$`Systematic gene name` & orf_name %in% pgs_in_network == F) %>% #select(updata, downdata) %>% select(nw_dist_down, nw_dist_up) %>% drop_na() #nones_nw_dist <- c(nones_nw_dist$updata, nones_nw_dist$downdata) nones_nw_dist <- c(nones_nw_dist$nw_dist_down, nones_nw_dist$nw_dist_up) ess_nw_dist <- genomic_neighbors %>% filter(orf_name %in% ess.names$X1) %>% select(nw_dist_down, nw_dist_up) %>% #select(updata, downdata) %>% drop_na() # ess_nw_dist <- c(ess_nw_dist$updata, ess_nw_dist$downdata) ess_nw_dist <- c(ess_nw_dist$nw_dist_down, ess_nw_dist$nw_dist_up) nw_dist_data <- suppressWarnings(bind_rows(nw_dist_data, data.frame(data = (pgs_nw_dist[is.infinite(pgs_nw_dist) == F]), group = "proto-genes"), data.frame(data = (nones_nw_dist[is.infinite(nones_nw_dist) == F]), group = "nonessential"), data.frame(data = (ess_nw_dist[is.infinite(ess_nw_dist) == F]), group = "essential"))) } nw_dist_data$group <- factor(nw_dist_data$group, levels = c("proto-genes", "nonessential", "essential")) library(ggsignif) library(coin) p12 <- pvalue(independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "proto-genes" | group == "nonessential")))) p13 <- pvalue(independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "proto-genes" | group == "essential")))) p23 <- pvalue(independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "essential" | group == "nonessential")))) ggplot(nw_dist_data, aes(x = group, fill = group, y = data)) + geom_boxplot(outlier.shape = NA) + scale_x_discrete(labels = c('Proto-genes','Non-essential Genes','Essential Genes')) +theme_bw()+ ylab("PCC with neighbors") + xlab("") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(),legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size=16)) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange")) +scale_y_continuous(breaks=c(-0.3,-0.2,-0.1, 0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9))#+geom_hline(yintercept = 0.1)#+ geom_signif( annotations = c(paste0("p=", formatC(p12, digits = 3)), paste0("p=", formatC(p13, digits = 3)), paste0("p=", formatC(p23, digits = 3))), y_position = c(max(nw_dist_data$data) +0.1, max(nw_dist_data$data) + 0.35, max(nw_dist_data$data) + 0.25), xmin = c(1, 1, 2), xmax = c(2, 3, 3) ) #+ ggpubr::stat_compare_means() # boxplot(pgs_nw_dist,nones_nw_dist,ess_nw_dist) # saveRDS(nw_dist_data,'analysis/data/derived_data/nw_dist_data_all.rds') # install.packages('coin') library(coin) independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "proto-genes" | group == "nonessential"))) oneway_test(data ~ group, data = (nw_dist_data %>% filter(group == "nonessential" | group == "essential"))) # ,distribution=approximate(B=9999))
library(ggsignif) nw_dist_data <- readRDS('analysis/data/derived_data/nw_dist_data_all.rds') p12 <- pvalue(independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "proto-genes" | group == "nonessential")))) p13 <- pvalue(independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "proto-genes" | group == "essential")))) p23 <- pvalue(independence_test(data ~ group, data = (nw_dist_data %>% filter(group == "essential" | group == "nonessential")))) ggplot(nw_dist_data, aes(x = group, fill = group, y = data)) + geom_boxplot() + scale_x_discrete(labels = c('Proto-genes','Non-essential Genes','Essential Genes')) +theme_bw()+ ylab("Shortest distance") + xlab("") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(),legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size=16)) + scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange")) + geom_signif( annotations = c(paste0("p=", formatC(p12, digits = 3)), paste0("p=", formatC(p13, digits = 3)), paste0("p=", formatC(p23, digits = 3))), y_position = c(max(nw_dist_data$data) + 1, max(nw_dist_data$data) + 3, max(nw_dist_data$data) + 2), xmin = c(1, 1, 2), xmax = c(2, 3, 3) )
nsim <- 1000 res <- numeric(nsim) ## set aside space for results ants <- nw_dist_data %>% mutate(group = ifelse(group == "proto-genes", "proto-genes", "nonessential")) ants <- nw_dist_data %>% filter(group == "nonessential" | group == "essential") for (i in 1:nsim) { ## standard approach: scramble response value perm <- sample(nrow(ants)) bdat <- transform(ants, data = data[perm]) ## compute & store difference in means; store the value res[i] <- mean(bdat[bdat$group == "nonessential", "data"]) - mean(bdat[bdat$group == "essential", "data"]) } obs <- mean(ants[ants$group == "nonessential", "data"]) - mean(ants[ants$group == "essential", "data"]) ## append the observed value to the list of results res <- c(res, obs) res hist(res, col = "gray", las = 1, main = "") abline(v = obs, col = "red") mean(abs(res) >= abs(obs), na.rm = T)
source("R/add.degree.R") strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv") #genomic_neighbors %>% saveRDS('analysis/data/derived_data/genomic_neighbors_withmypccall.rds') # df_pcc_filtered <- df_pcc %>% # filter(str_detect(X1, "supp") == F & str_detect(q, "supp") == F & X1 %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & q %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name` %in% overlappingorfs$orf_name] == F & # pcc >= 0.2) %>% # drop_na() # library(igraph) #g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp'))) V(g)$sgd <- map_chr(V(g)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) g_final_noneigh <- delete.vertices(g,V(g)$sgd%in%neighboreffects$orf_name) g_final_noneigh=delete.edges(g_final_noneigh, which(E(g_final_noneigh)$weight <.2)) g_final_noneigh <- delete.vertices(g_final_noneigh,which(degree(g_final_noneigh)<1 )) pcc_net <- g_final_noneigh#graph_from_data_frame(df_pcc_filtered, F) #E(pcc_net)$weight <- df_pcc_filtered$pcc pcc_net_simpl <- pcc_net# %>% igraph::simplify(edge.attr.comb = "mean") nones.exp.data <- read_csv("analysis/data/derived_data/strain_ids_with_experiment_count_nonessential.csv") %>% add.degree(pcc_net, colnames(.)[2]) V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) #genomic_neighbors <- readRDS("analysis/data/derived_data/neighbor_pcc_data.rds") # V(pcc_net_simpl)$sgd <- map_chr(V(pcc_net_simpl)$name, function(x) strain_ids$`Systematic gene name`[strain_ids$`Allele Gene name` == x]) genomic_neighbors <- genomic_neighbors %>% mutate(nw_dist_down = NA, nw_dist_up = NA) for (i in 1:nrow(genomic_neighbors)) { gallele <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd == genomic_neighbors$orf_name[i]] allelnamesup <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 2]] allelnamesdown <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 3]] c <- tidyr::crossing(allelnamesup, gallele) %>% as.data.frame() # %>% filter(allelnamesup %in% gallele & allelnamesup1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesup) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, distances(pcc_net_simpl, c[t, 1], c[t, 2], weights = NA)) } genomic_neighbors[i, ]$nw_dist_up <- min(v, na.rm = T) c <- tidyr::crossing(allelnamesdown, gallele) %>% as.data.frame() # %>% filter(allelnamesdown %in% gallele & allelnamesdown1 %in% gallele == F) if (nrow(c) == 0 | length(gallele) == 0 | length(allelnamesdown) == 0) next v <- c() for (t in 1:nrow(c)) { v <- append(v, distances(pcc_net_simpl, c[t, 1], c[t, 2], weights = NA)) } genomic_neighbors[i, ]$nw_dist_down <- min(v, na.rm = T) } all_ds <- distances(pcc_net_simpl,weights = NA) upper <- upper.tri(all_ds) for (i in 1:nrow(genomic_neighbors)) { gallele <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd == genomic_neighbors$orf_name[i]] allelnamesup <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 2]] allelnamesdown <- V(pcc_net_simpl)$name[V(pcc_net_simpl)$sgd %in% genomic_neighbors[i, 3]] all_ds[rownames(all_ds)%in%gallele, colnames(all_ds)%in%allelnamesdown] <- NA all_ds[rownames(all_ds)%in%gallele, colnames(all_ds)%in%allelnamesup] <- NA #genomic_neighbors[i, ]$nw_dist_down <- min(v, na.rm = T) } group_name <- pgs$orf_name dd <- pgs_nw_dist other <- all_ds[rownames(all_ds)%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%group_name]] nw_dist_data_comparewneigh_pgs <- bind_rows(data.frame(group='neighbors',data=dd[is.na(dd)==F&is.infinite(dd)==F]),data.frame(group='everything',data=other[is.infinite(other)==F&is.na(other)==F])) %>% mutate(type='proto-gene') group_name <- dd <- nones_nw_dist other <- all_ds[rownames(all_ds)%in%nones.exp.data$`Allele Gene name` & rownames(all_ds) %in% strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%pgs_in_network] == F] nw_dist_data_comparewneigh_nones <- bind_rows(data.frame(group='neighbors',data=dd[is.na(dd)==F&is.infinite(dd)==F]),data.frame(group='everything',data=other[is.infinite(other)==F&is.na(other)==F])) %>% mutate(type='nonessential') group_name <- pgs$orf_name dd <- ess_nw_dist other <- all_ds[rownames(all_ds)%in%ess.alleles$X1] nw_dist_data_comparewneigh_ess <- bind_rows(data.frame(group='neighbors',data=dd[is.na(dd)==F&is.infinite(dd)==F]),data.frame(group='everything',data=other[is.infinite(other)==F&is.na(other)==F])) %>% mutate(type='essential') #nw_dist_data_comparewneigh$group <- factor(nw_dist_data_comparewneigh$group,levels = c('neighbors','everything')) #png('analysis/figures/boxtry.png') comb_nw_data <- bind_rows(nw_dist_data_comparewneigh_pgs,nw_dist_data_comparewneigh_nones,nw_dist_data_comparewneigh_ess) comb_nw_data$group <- factor(comb_nw_data$group,levels = c('neighbors','everything')) comb_nw_data$type <- factor(comb_nw_data$type,levels=c('proto-gene','nonessential','essential')) p_pgs_neigh <- pvalue(independence_test(group~data,data=comb_nw_data %>% filter(type=='proto-gene'))) p_nones_neigh <- pvalue(independence_test(group~data,data=comb_nw_data %>% filter(type=='nonessential'))) p_ess_neigh <- pvalue(independence_test(group~data,data=comb_nw_data %>% filter(type=='essential'))) # ggplot(comb_nw_data,aes(x=type,y=data,color=group,fill=type))+geom_boxplot(outlier.shape = NA,fatten = 10)+scale_x_discrete(labels=c('Proto-gene','Nonessential Genes','Essential Genes'))+theme_bw()+ylab('Shortest Distance')+xlab('')+scale_fill_manual(values = c("#00BFC4", "#FF3300", "orange"))+scale_color_manual(name='',values=c('darkgrey','black'),labels=c('Distance to neighbors','Distance to others'))+guides(fill=FALSE)+ # theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # panel.background = element_blank(), axis.title.y = element_text(size = 25), axis.text.x = element_text(size=16)) #comb_nw_data %>% saveRDS('analysis/data/derived_data/comb_nw_data_withmypccall0620.rds') comb_nw_data <- readRDS('analysis/data/derived_data/comb_nw_data_withmypccall0620.rds') ggplot(comb_nw_data,aes(x=type,y=data,fill=group))+geom_boxplot()+scale_x_discrete(labels=c('Proto-gene','Nonessential Genes','Essential Genes'))+theme_bw()+ylab('Shortest Distance')+xlab('')+scale_fill_manual(name='',values = c("#af8dc3", "#7fbf7b"),labels=c('Distance to neighbors','Distance to others'))+guides(color=FALSE)+#scale_y_discrete(limits=c(0,20))+ theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.title.y = element_text(size = 25), axis.text.x = element_text(size=16),legend.text=element_text(size=16))#+geom_hline(yintercept = mean_distance(pcc_net)) library(ggpubr) # ggbarplot(comb_nw_data,x='type',y='data',fill='group',fill='group',add = 'mean_se',position = position_dodge()) # ##comb_nw_data %>% saveRDS('analysis/data/derived_data/comb_nw_data.rds') ggplot(comb_nw_data, aes(x=type,y=data,color=group,fill=group,group = as.factor(group))) + stat_summary(fun.y = mean, geom = "point",position = position_dodge(width=1)) + stat_summary(fun.data = mean_se, geom = "errorbar",position = position_dodge(width=1)) # library(ggsignif) # ggplot(nw_dist_data_comparewneigh,aes(x=group,y=data,fill=group))+geom_boxplot(fatten = 2) + scale_x_discrete(labels = c('to Neighbors','to Others')) +theme_bw()+ # ylab("Shortest distance") + xlab("") + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), # panel.background = element_blank(),legend.position = "none", axis.title.y = element_text(size = 25), axis.text.x = element_text(size=16)) + scale_fill_manual(values = c("#488f31", "#de425b")) + # geom_signif( # annotations = c(paste0("p=",formatC(p12_neigh,digits = 2))), # y_position = c(max(nw_dist_data_comparewneigh$data)+2), xmin = c(1), xmax = c(2) # ) #wilcox.test(c(genomic_neighbors$nw_dist_down,genomic_neighbors$nw_dist_up),all_ds[upper]) #dev.off()
nsim <- 1000 res <- numeric(nsim) ## set aside space for results #ants <- nw_dist_data %>% mutate(group = ifelse(group == "proto-genes", "proto-genes", "nonessential")) ants <- nw_dist_data_comparewneigh#nw_dist_data %>% filter(group == "nonessential" | group == "essential") for (i in 1:nsim) { ## standard approach: scramble response value perm <- sample(nrow(ants)) bdat <- transform(ants, data = data[perm]) ## compute & store difference in means; store the value res[i] <- mean(bdat[bdat$group == "everything", "data"]) - mean(bdat[bdat$group == "neighbors", "data"]) } obs <- mean(ants[ants$group == "everything", "data"]) - mean(ants[ants$group == "neighbors", "data"]) ## append the observed value to the list of results res <- c(res, obs) res hist(res, col = "gray", las = 1, main = "") abline(v = obs, col = "red") mean(abs(res) >= abs(obs), na.rm = T)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.