R/clustering_input.R

library(tidyverse)
library(igraph)
library(data.table)
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
costanzo_matrix_data <- fread('analysis/data/raw_data/pcc_ALL.txt',header=T)
costanzo_matrix <- costanzo_matrix_data[-1,-c(1,2)]%>%as.matrix()
rownames(costanzo_matrix) <- colnames(costanzo_matrix)
class(costanzo_matrix) <- "numeric"
adj_res_graph <- graph_from_adjacency_matrix(costanzo_matrix,weighted=TRUE,mode='undirected',diag=F)
g <- delete.vertices(adj_res_graph,which(str_detect(V(adj_res_graph)$name,'supp')|V(adj_res_graph)$name%in%strain_ids$`Allele Gene name`[strain_ids$`Systematic gene name`%in%overlappingorfs$orf_name]))#

g_final=delete.edges(g, which(E(g)$weight <.2|is.na(E(g)$weight)|is.nan(E(g)$weight)))

g_final <- delete.vertices(g_final,which(degree(g_final)<1 ))
pcc.net <- g_final#graph_from_data_frame(df.pcc,F)
# E(pcc.net)$weight <- df.pcc$pcc
nw.del<-igraph::simplify(pcc.net,remove.loops = T)


nw.del %>% as_long_data_frame() %>% as_tibble() %>% select(4:5) %>% write_tsv('analysis/data/derived_data/clustering_input_pcc',col_names = FALSE)
oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.