R/synalAnalysis.R

library(tidyverse)
library(rtracklayer)
library(gower)
library(dendextend)
library(cluster)
library(StatMatch)
library(gplots)

set.seed(1)
overlappingorfs <- read_csv("analysis/data/raw_data/overlappingorfs.csv")
net.df <- readRDS("analysis/data/derived_data/SGA_data_combined.rds.gz")
strain_ids <- read_csv("analysis/data/derived_data/strain_ids.csv")
net_df_significant <- filter(net.df, `P-value` <= 0.05 & `Query Strain ID` %in% overlappingorfs$orf_name == FALSE & `Array Strain ID` %in% overlappingorfs$orf_name == FALSE)

net_df_significant_sl <- filter(net_df_significant, `Genetic interaction score (ε)` <= -0.35)

go <- read_csv('~/Downloads/GO_hits.csv')
scer <- import(con = "~/Downloads/ucscgenes.bed")
#smorfsAll <- read_csv("ANNE2-28-19-ref_genes.csv") %>% filter(is.na(overlap_refgene) == T)
overlappingNongenes <- read.csv("~/Main/anne/overlappingNongenes.csv")
overlappingNongenes <- overlappingNongenes$x
summ <- read_csv("~/Main/SUMMARY_2012.csv")
summ.gene <- summ$orf_name[summ$category == "gene"]

synaldata <- read_csv("/media/oma21/Files/alldata/alldata/heatmapdata.csv")

pgs <- read_csv('~/Main/pgsNetwork/data/0701_proto-genes_list')

genesDf <- read_csv("~/Main/anne/networkSynal/geneheatmap.csv") %>% mutate(annotation = "gene")
rowsam <- sample(1:1030, 100)
genesDf <- genesDf[rowsam, ]

genesDf[, c(3, 4, 5, 6)] <- lapply(genesDf[, c(3, 4, 5, 6)], as.numeric)

nongenesDf <- read_csv("~/Main/anne/networkSynal/nongeneheatmap.csv") %>%
  mutate(annotation = "nongenes") %>%
  filter(orf_name %in% overlappingNongenes == F)
nongenesDf <- nongenesDf[sample(1:nrow(nongenesDf), 100),]
nongenesDf[, c(3, 4, 5, 6)] <- lapply(nongenesDf[, c(3, 4, 5, 6)], as.numeric)

heatmapfornetwork <- synaldata %>%
  filter(orf_name %in% pgs$orf_name) %>%
  dplyr::mutate(annotation = "proto-gene")
colnames(heatmapfornetwork) <- colnames(nongenesDf)
# heatmapfornetwork <- ifelse(heatmapfornetwork==1,TRUE,FALSE)
P_matrix <- bind_rows(nongenesDf, heatmapfornetwork, genesDf) # heatmapfornetwork[,-c(1,2)]heatmapfornetwork#%>%filter(orf_name%in%diff.name)#
P_matrix1 <- P_matrix[, -c(1, 2, 27)]
gower_dist <- gower.dist(as.data.frame(P_matrix1))
P_matrix2 <- as.data.frame(P_matrix1)
P_matrix2[, c(5, 6, 7, 8, 9, 10, 11, 12)] <- lapply((P_matrix2[, c(5, 6, 7, 8, 9, 10, 11, 12)]), as.factor)
weights <- c(
  0.5, 0.5, 0.5, 0.5,
  0.5, 0.5, 0.5, 0.5,
  0.5, 0.5, 0.5, 0.5,
  2, 4, 6, 8,
  0.5, 0.5, 0.5, 0.5,
  2, 4, 6, 8
)
# weights <- c(0.5,0.5,0.5,0.5,
#              0,0,0,0,
#              0,0,0,0,
#              2,4,6,8,
#              0.5,0.5,0.5,0.5,
#              2,4,6,8)

# weights <- c(0.5,0.5,0.5,0.5,
#              1,1,1,1,
#              1,1,1,1,
#              0,0,0,0,
#              0.5,0.5,0.5,0.5,
#              1,1,1,2)

gw <- daisy(as.data.frame(P_matrix2), metric = "gower", weights = weights)

# pam_fit <- pam(gw, diss = TRUE, k = 2)
# clusplot(pam_fit)

# cluster the distance matrix with agnes. I use 'complete' or 'ward' linkage methods depending on the clusters and goals
agn1 <- agnes(gw, method = "ward")

# create dendrogram
dend1_ <- as.dendrogram(agn1)
#cols_branches <- c("red", "gold", "violet", "darkseagreen", "purple", "darkblue")
#dend1_ <- color_branches(dend1_, k = length(cols_branches), col = cols_branches)


{
  P_matrix1$`start codon in Para`[P_matrix1$`start codon in Para` != "M" ] <- 0 # & P_matrix1$`start codon in Para`!='I'
  P_matrix1$`start codon in Mik`[P_matrix1$`start codon in Mik` != "M"] <- 0

  P_matrix1$`start codon in Para`[P_matrix1$`start codon in Para` == "M"] <- 1
  P_matrix1$`start codon in Mik`[P_matrix1$`start codon in Mik` == "M"] <- 1

  # P_matrix1$`start codon in Para`[P_matrix1$`start codon in Para`=='I']<-0.5
  # P_matrix1$`start codon in Mik`[P_matrix1$`start codon in Mik`=='I']<-0.5

  P_matrix1$`stop codon in Para`[P_matrix1$`stop codon in Para` != "X"] <- 0
  P_matrix1$`stop codon in Mik`[P_matrix1$`stop codon in Mik` != "X"] <- 0

  P_matrix1$`stop codon in Para`[P_matrix1$`stop codon in Para` == "X"] <- 1
  P_matrix1$`stop codon in Mik`[P_matrix1$`stop codon in Mik` == "X"] <- 1

  P_matrix1$`start codon in Bay`[P_matrix1$`start codon in Bay` != "M" ] <- 0 # & P_matrix1$`start codon in Bay`!='I'
  P_matrix1$`start codon in Kud`[P_matrix1$`start codon in Kud` != "M" ] <- 0 # & P_matrix1$`start codon in Kud`!='I'

  P_matrix1$`start codon in Bay`[P_matrix1$`start codon in Bay` == "M"] <- 1
  P_matrix1$`start codon in Kud`[P_matrix1$`start codon in Kud` == "M"] <- 1

  # P_matrix1$`start codon in Bay`[P_matrix1$`start codon in Bay`=='I']<-0.5
  # P_matrix1$`start codon in Kud`[P_matrix1$`start codon in Kud`=='I']<-0.5

  P_matrix1$`stop codon in Bay`[P_matrix1$`stop codon in Bay` != "X"] <- 0
  P_matrix1$`stop codon in Kud`[P_matrix1$`stop codon in Kud` != "X"] <- 0

  P_matrix1$`stop codon in Bay`[P_matrix1$`stop codon in Bay` == "X"] <- 1
  P_matrix1$`stop codon in Kud`[P_matrix1$`stop codon in Kud` == "X"] <- 1
}
library(RColorBrewer)
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(12)

# cols <- rep('black', nrow(P_matrix))
# cols[(P_matrix$orf_name) %in%heatmapfornetwork$orf_name] <- 'red'

# cols[(P_matrix$orf_name) %in%filtered$orf_name] <- 'red'
# to rotate the dendrogram. see ?click_rotate
P_matrix1 <- as.data.frame(P_matrix1)
rownames(P_matrix1) <- P_matrix$orf_name
# dend1_<-click_rotate(dend1_)
gene_col <- ifelse(P_matrix$annotation == "proto-gene", "lightblue", ifelse(P_matrix$annotation == "nongenes", "blue", "white"))
# ifelse(P_matrix$orf_name%in%aaprot$x,'blue','red')#
# pdf('../data/plots/heatmap_withnongene.pdf',width=16,height=12)
# out <- heatmap.2(data.matrix(P_matrix1),
#                  # cellnote = mat_data,  # same data set for cell labels
#                  # main = "agnes, manhattan", # heat map title
#                  notecol = "black", # change font color of cell labels to black
#                  density.info = "none", # turns off density plot inside color legend
#                  trace = "none", # turns off trace lines inside the heat map
#                  # breaks = palette.breaks,
#                  colRow = cols,
#                  # scale = "row",
#                  margins = c(8, 4), # widens margins around plot
#                  col = hmcol, # use on color palette defined earlier
#                  # breaks=col_breaks,    # enable color transition at specified limits
#                  srtCol = 45,
#                  dendrogram = "row", # only draw a row dendrogram
#                  Rowv = dend1_, # sort(rotate.dendrogram1(dend1_,groups)),#,type = "average"),
#                  RowSideColors = matrix(gene_col),
#                  Colv = FALSE,
#                  cexRow = 0.1,
#                  # rowsep = c(531),
#                  # colsep = c(3,6,9,12,15,19,23,27,30,33),
#                  sepcolor = "red",
#                  sepwidth = c(0.05, 1)
# )

library(circlize)
library(ComplexHeatmap)
# heatmapfornetwork <- heatmapfornetwork %>% dplyr::mutate(annotation='proto-gene')
#newnovel4 <- read_csv("~/Downloads/omersyntenyfilter.csv")

annotationMat <- heatmapfornetwork %>%
  filter(orf_name %in% pgs$orf_name) %>%
  mutate(annotation = "Proto-gene") # ifelse(orf_name%in%newnovel1$orf_name,'Proto-gene','Filtered'))

hmcol <- colorRampPalette(brewer.pal(9, "RdPu"))(12)

nongenesDf <- nongenesDf %>% mutate(annotation = "Intergenic ORF")
genesDf <- genesDf %>% mutate(annotation = "Gene")

annotationMat <- bind_rows(nongenesDf, annotationMat, genesDf) %>% mutate(go=ifelse(orf_name%in%go$Protogene,'go','nogo'),
                                                                          lethal = ifelse(orf_name%in%net_df_significant_sl$`Query Strain ID`|orf_name%in%net_df_significant_sl$`Array Strain ID`,'lethal','not lethal'))
sa <- SingleAnnotation("Degree", P_matrix$annotation)
rwcol <- colorRamp2(c(0, 10, 20, 31), c("#F7FBFF", "#3888C1", "#105BA4", "#08306B"), space = "RGB") # colorRampPalette(brewer.pal(9, "Blues"),bias=3)(31)#
# names(rwsidecol) <- seq(0,34,1)
ha <- rowAnnotation(df = data.frame(Type = annotationMat$annotation,Go=annotationMat$go,Lethal=annotationMat$lethal),
                    col = list(Type = c(
  "Gene" = "#f8f8ff", "Proto-gene" = "lightblue",
  "Intergenic ORF" = "blue"

),Go = c(
  "go" = "darkgreen", "nogo" = "white"#,
  #"Non-gene" = "blue"
),Lethal = c(
  "lethal" = "red",# "Proto-gene" = "lightblue",
  "not lethal" = "white"
)))

tt <- (as.matrix(P_matrix1))
class(tt) <- "numeric"
#cn <- colnames(P_matrix1)
cn <- rep(c('S.paradoxus', 'S. mikatae','S. kudriavzevii', 'S. bayanus'),6)
topannot <- c('Is there \noverlapping\n ORF',
              'Do \nstart codons\n align?',
              'Do \nstop codons\n align?',
              'Amino acid\n overlap \nIdentity',
              'ORF length\n Ratio',
              'DNA identity')



hp <- Heatmap((tt),
              col = hmcol,
              name = "",
              # Colv=FALSE,
              # dendrogram ='row',
              cluster_columns = F,
              row_dend_reorder = FALSE,
              cluster_rows = as.dendrogram(dend1_),
              show_row_names = T,
              show_column_names = FALSE,
              # = ha,
              #column_title_gp = gpar(fontsize = 1),
              row_names_gp = gpar(fontsize = 0),
              column_names_gp = gpar(fontsize = 8, srt = 45),
              row_dend_width = unit(1, "in"),
              bottom_annotation = HeatmapAnnotation(
                text = anno_text(cn, rot = 45,just = "right",gp = gpar(fontface = 'italic',fontsize=12)),
                annotation_height = max_text_width(cn)
              ),
              #top_annotation = HeatmapAnnotation(foo=anno_block(gp=gpar(fill=2:7),labels=(topannot)))
              # top_annotation = HeatmapAnnotation(foo = anno_block(#gp = gpar(fill = 2:4),
              #                                                     labels = c("group1", "group2", "group3"),
              #                                                     labels_gp = gpar(col = "white", fontsize = 10))),
              #column_km=3
              column_split = factor(rep(topannot, each = 4),levels=topannot),
              column_title_gp = gpar(fontsize=10),
              # column_split = factor(rep(letters[1:6], 4), levels = letters[6:1]),
              cluster_column_slices = F,
             #column_gap = unit(0.5,'in')
             # rect_gp = gpar(type = "none"),
              # cell_fun = function(j, i, x, y, width, height, fill) {
              #   grid.rect(x = x, y = y, width = width * 0.6, height = height*0.6),
              #   },
             # heatmap_height = unit(0.05, "in")*nrow(tt)

              # column_names_max_height =unit(1, "cm")
              # cexRow = 0.4,
              # sepwidth = c(0.1,0.1),
              # srtCol = 45,
              # offsetRow = 10,
              # margins =c(6,24),     # widens margins around plot
              # RowSideColors = (rwsidecol[pgs.df$deg+1])
              # lwid=c(2,6), lhei=c(1,2), # keysize=2,# key.par = list(cex=1),lmat=rbind( c(0, 0, 0,4), c(3,1,2,4 ) ), lwid=c(0.2, 0.2, 1,0.5 ),
              # density.info = 'none',key.title = '',key.xlab = '',key.size=10,#key.par = par(cex=c(0.1))
              # vline=c(0.5,0.3)#,0.3,0.5,0.5,0.5,0.5,0.5)
)
#ggsave('analysis/figures/0812_poster/heatmap.pdf',hp + ha,width=)
pdf('analysis/figures/0812_poster/heatmap.pdf',width = 9,height = 9)
hp + ha
dev.off()
dend1_ <- click_rotate(dend1_)
oacar/pgsNetwork documentation built on Oct. 1, 2019, 9:15 a.m.