library(tidyverse)
library(rtracklayer)
library(gower)
library(dendextend)
library(cluster)
library(StatMatch)
library(gplots)
set.seed(1)
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
net.df <- readRDS("analysis/data/derived_data/SGA_data_combined.rds.gz")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)
net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.35)
go <- read_csv('~/Downloads/GO_hits.csv')
scer <- import(con = "~/Downloads/ucscgenes.bed")
#smorfsAll <- read_csv("ANNE2-28-19-ref_genes.csv") %>% filter(is.na(overlap_refgene) == T)
overlappingNongenes <- read.csv("~/Main/anne/overlappingNongenes.csv")
overlappingNongenes <- overlappingNongenes$x
summ <- read_csv("~/Main/SUMMARY_2012.csv")
summ.gene <- summ$orf_name[summ$category == "gene"]
synaldata <- read_csv("/media/oma21/Files/alldata/alldata/heatmapdata.csv")
pgs <- read_csv('~/Main/pgsNetwork/data/0701_proto-genes_list')
genesDf <- read_csv("~/Main/anne/networkSynal/geneheatmap.csv") %>% mutate(annotation = "gene")
rowsam <- sample(1:1030, 100)
genesDf <- genesDf[rowsam, ]
genesDf[, c(3, 4, 5, 6)] <- lapply(genesDf[, c(3, 4, 5, 6)], as.numeric)
nongenesDf <- read_csv("~/Main/anne/networkSynal/nongeneheatmap.csv") %>%
mutate(annotation = "nongenes") %>%
filter(orf_name %in% overlappingNongenes == F)
nongenesDf <- nongenesDf[sample(1:nrow(nongenesDf), 100),]
nongenesDf[, c(3, 4, 5, 6)] <- lapply(nongenesDf[, c(3, 4, 5, 6)], as.numeric)
heatmapfornetwork <- synaldata %>%
filter(orf_name %in% pgs$orf_name) %>%
dplyr::mutate(annotation = "proto-gene")
colnames(heatmapfornetwork) <- colnames(nongenesDf)
# heatmapfornetwork <- ifelse(heatmapfornetwork==1,TRUE,FALSE)
P_matrix <- bind_rows(nongenesDf, heatmapfornetwork, genesDf) # heatmapfornetwork[,-c(1,2)]heatmapfornetwork#%>%filter(orf_name%in%diff.name)#
P_matrix1 <- P_matrix[, -c(1, 2, 27)]
gower_dist <- gower.dist(as.data.frame(P_matrix1))
P_matrix2 <- as.data.frame(P_matrix1)
P_matrix2[, c(5, 6, 7, 8, 9, 10, 11, 12)] <- lapply((P_matrix2[, c(5, 6, 7, 8, 9, 10, 11, 12)]), as.factor)
weights <- c(
0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5,
2, 4, 6, 8,
0.5, 0.5, 0.5, 0.5,
2, 4, 6, 8
)
# weights <- c(0.5,0.5,0.5,0.5,
# 0,0,0,0,
# 0,0,0,0,
# 2,4,6,8,
# 0.5,0.5,0.5,0.5,
# 2,4,6,8)
# weights <- c(0.5,0.5,0.5,0.5,
# 1,1,1,1,
# 1,1,1,1,
# 0,0,0,0,
# 0.5,0.5,0.5,0.5,
# 1,1,1,2)
gw <- daisy(as.data.frame(P_matrix2), metric = "gower", weights = weights)
# pam_fit <- pam(gw, diss = TRUE, k = 2)
# clusplot(pam_fit)
# cluster the distance matrix with agnes. I use 'complete' or 'ward' linkage methods depending on the clusters and goals
agn1 <- agnes(gw, method = "ward")
# create dendrogram
dend1_ <- as.dendrogram(agn1)
#cols_branches <- c("red", "gold", "violet", "darkseagreen", "purple", "darkblue")
#dend1_ <- color_branches(dend1_, k = length(cols_branches), col = cols_branches)
{
P_matrix1$`start codon in Para`[P_matrix1$`start codon in Para` != "M" ] <- 0 # & P_matrix1$`start codon in Para`!='I'
P_matrix1$`start codon in Mik`[P_matrix1$`start codon in Mik` != "M"] <- 0
P_matrix1$`start codon in Para`[P_matrix1$`start codon in Para` == "M"] <- 1
P_matrix1$`start codon in Mik`[P_matrix1$`start codon in Mik` == "M"] <- 1
# P_matrix1$`start codon in Para`[P_matrix1$`start codon in Para`=='I']<-0.5
# P_matrix1$`start codon in Mik`[P_matrix1$`start codon in Mik`=='I']<-0.5
P_matrix1$`stop codon in Para`[P_matrix1$`stop codon in Para` != "X"] <- 0
P_matrix1$`stop codon in Mik`[P_matrix1$`stop codon in Mik` != "X"] <- 0
P_matrix1$`stop codon in Para`[P_matrix1$`stop codon in Para` == "X"] <- 1
P_matrix1$`stop codon in Mik`[P_matrix1$`stop codon in Mik` == "X"] <- 1
P_matrix1$`start codon in Bay`[P_matrix1$`start codon in Bay` != "M" ] <- 0 # & P_matrix1$`start codon in Bay`!='I'
P_matrix1$`start codon in Kud`[P_matrix1$`start codon in Kud` != "M" ] <- 0 # & P_matrix1$`start codon in Kud`!='I'
P_matrix1$`start codon in Bay`[P_matrix1$`start codon in Bay` == "M"] <- 1
P_matrix1$`start codon in Kud`[P_matrix1$`start codon in Kud` == "M"] <- 1
# P_matrix1$`start codon in Bay`[P_matrix1$`start codon in Bay`=='I']<-0.5
# P_matrix1$`start codon in Kud`[P_matrix1$`start codon in Kud`=='I']<-0.5
P_matrix1$`stop codon in Bay`[P_matrix1$`stop codon in Bay` != "X"] <- 0
P_matrix1$`stop codon in Kud`[P_matrix1$`stop codon in Kud` != "X"] <- 0
P_matrix1$`stop codon in Bay`[P_matrix1$`stop codon in Bay` == "X"] <- 1
P_matrix1$`stop codon in Kud`[P_matrix1$`stop codon in Kud` == "X"] <- 1
}
library(RColorBrewer)
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(12)
# cols <- rep('black', nrow(P_matrix))
# cols[(P_matrix$orf_name) %in%heatmapfornetwork$orf_name] <- 'red'
# cols[(P_matrix$orf_name) %in%filtered$orf_name] <- 'red'
# to rotate the dendrogram. see ?click_rotate
P_matrix1 <- as.data.frame(P_matrix1)
rownames(P_matrix1) <- P_matrix$orf_name
# dend1_<-click_rotate(dend1_)
gene_col <- ifelse(P_matrix$annotation == "proto-gene", "lightblue", ifelse(P_matrix$annotation == "nongenes", "blue", "white"))
# ifelse(P_matrix$orf_name%in%aaprot$x,'blue','red')#
# pdf('../data/plots/heatmap_withnongene.pdf',width=16,height=12)
# out <- heatmap.2(data.matrix(P_matrix1),
# # cellnote = mat_data, # same data set for cell labels
# # main = "agnes, manhattan", # heat map title
# notecol = "black", # change font color of cell labels to black
# density.info = "none", # turns off density plot inside color legend
# trace = "none", # turns off trace lines inside the heat map
# # breaks = palette.breaks,
# colRow = cols,
# # scale = "row",
# margins = c(8, 4), # widens margins around plot
# col = hmcol, # use on color palette defined earlier
# # breaks=col_breaks, # enable color transition at specified limits
# srtCol = 45,
# dendrogram = "row", # only draw a row dendrogram
# Rowv = dend1_, # sort(rotate.dendrogram1(dend1_,groups)),#,type = "average"),
# RowSideColors = matrix(gene_col),
# Colv = FALSE,
# cexRow = 0.1,
# # rowsep = c(531),
# # colsep = c(3,6,9,12,15,19,23,27,30,33),
# sepcolor = "red",
# sepwidth = c(0.05, 1)
# )
library(circlize)
library(ComplexHeatmap)
# heatmapfornetwork <- heatmapfornetwork %>% dplyr::mutate(annotation='proto-gene')
#newnovel4 <- read_csv("~/Downloads/omersyntenyfilter.csv")
annotationMat <- heatmapfornetwork %>%
filter(orf_name %in% pgs$orf_name) %>%
mutate(annotation = "Proto-gene") # ifelse(orf_name%in%newnovel1$orf_name,'Proto-gene','Filtered'))
hmcol <- colorRampPalette(brewer.pal(9, "RdPu"))(12)
nongenesDf <- nongenesDf %>% mutate(annotation = "Intergenic ORF")
genesDf <- genesDf %>% mutate(annotation = "Gene")
annotationMat <- bind_rows(nongenesDf, annotationMat, genesDf) %>% mutate(go=ifelse(orf_name%in%go$Protogene,'go','nogo'),
lethal = ifelse(orf_name%in%net_df_significant_sl$`Query Strain ID`|orf_name%in%net_df_significant_sl$`Array Strain ID`,'lethal','not lethal'))
sa <- SingleAnnotation("Degree", P_matrix$annotation)
rwcol <- colorRamp2(c(0, 10, 20, 31), c("#F7FBFF", "#3888C1", "#105BA4", "#08306B"), space = "RGB") # colorRampPalette(brewer.pal(9, "Blues"),bias=3)(31)#
# names(rwsidecol) <- seq(0,34,1)
ha <- rowAnnotation(df = data.frame(Type = annotationMat$annotation,Go=annotationMat$go,Lethal=annotationMat$lethal),
col = list(Type = c(
"Gene" = "#f8f8ff", "Proto-gene" = "lightblue",
"Intergenic ORF" = "blue"
),Go = c(
"go" = "darkgreen", "nogo" = "white"#,
#"Non-gene" = "blue"
),Lethal = c(
"lethal" = "red",# "Proto-gene" = "lightblue",
"not lethal" = "white"
)))
tt <- (as.matrix(P_matrix1))
class(tt) <- "numeric"
#cn <- colnames(P_matrix1)
cn <- rep(c('S.paradoxus', 'S. mikatae','S. kudriavzevii', 'S. bayanus'),6)
topannot <- c('Is there \noverlapping\n ORF',
'Do \nstart codons\n align?',
'Do \nstop codons\n align?',
'Amino acid\n overlap \nIdentity',
'ORF length\n Ratio',
'DNA identity')
hp <- Heatmap((tt),
col = hmcol,
name = "",
# Colv=FALSE,
# dendrogram ='row',
cluster_columns = F,
row_dend_reorder = FALSE,
cluster_rows = as.dendrogram(dend1_),
show_row_names = T,
show_column_names = FALSE,
# = ha,
#column_title_gp = gpar(fontsize = 1),
row_names_gp = gpar(fontsize = 0),
column_names_gp = gpar(fontsize = 8, srt = 45),
row_dend_width = unit(1, "in"),
bottom_annotation = HeatmapAnnotation(
text = anno_text(cn, rot = 45,just = "right",gp = gpar(fontface = 'italic',fontsize=12)),
annotation_height = max_text_width(cn)
),
#top_annotation = HeatmapAnnotation(foo=anno_block(gp=gpar(fill=2:7),labels=(topannot)))
# top_annotation = HeatmapAnnotation(foo = anno_block(#gp = gpar(fill = 2:4),
# labels = c("group1", "group2", "group3"),
# labels_gp = gpar(col = "white", fontsize = 10))),
#column_km=3
column_split = factor(rep(topannot, each = 4),levels=topannot),
column_title_gp = gpar(fontsize=10),
# column_split = factor(rep(letters[1:6], 4), levels = letters[6:1]),
cluster_column_slices = F,
#column_gap = unit(0.5,'in')
# rect_gp = gpar(type = "none"),
# cell_fun = function(j, i, x, y, width, height, fill) {
# grid.rect(x = x, y = y, width = width * 0.6, height = height*0.6),
# },
# heatmap_height = unit(0.05, "in")*nrow(tt)
# column_names_max_height =unit(1, "cm")
# cexRow = 0.4,
# sepwidth = c(0.1,0.1),
# srtCol = 45,
# offsetRow = 10,
# margins =c(6,24), # widens margins around plot
# RowSideColors = (rwsidecol[pgs.df$deg+1])
# lwid=c(2,6), lhei=c(1,2), # keysize=2,# key.par = list(cex=1),lmat=rbind( c(0, 0, 0,4), c(3,1,2,4 ) ), lwid=c(0.2, 0.2, 1,0.5 ),
# density.info = 'none',key.title = '',key.xlab = '',key.size=10,#key.par = par(cex=c(0.1))
# vline=c(0.5,0.3)#,0.3,0.5,0.5,0.5,0.5,0.5)
)
#ggsave('analysis/figures/0812_poster/heatmap.pdf',hp + ha,width=)
pdf('analysis/figures/0812_poster/heatmap.pdf',width = 9,height = 9)
hp + ha
dev.off()
dend1_ <- click_rotate(dend1_)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.