devtools::install_github("philippmuench/OligoMMR")
library(OligoMMR)
data(oligomm_ab_wgs_imputed_wide) # contains AF profile for heatmap
data(oligomm_ab_isolates) # for ation purpose

we disable here the filtering since we are filtering for uniqueness and not based on p-value of effect size

mouse_ids <- c("1683", "1688", "1692", "1699")
col_fun <- circlize::colorRamp2(c(0, 0.5, 1), c("white", "orange", "red"))

We filter now the AF matrix, first by the p values associated to each SNP, then by the effect size and finally using a threshold on how many non-NA observations should be per SNP

df_wide_bug <- as.data.frame(oligomm_ab_wgs_imputed_wide)
dim(df_wide_bug)

control_mouse <- c("1681", "1682", "1683", "1684", "1685")
# subset for control samples

control_subset <- df_wide_bug[, substr(colnames(df_wide_bug), 1, 4) %in% control_mouse]
rowmeans <- rowMeans(control_subset, na.rm = T)
library(matrixStats)
threshold_af <- 0.2
threshold_sd <- 1

rowsds <- rowSds(as.matrix(control_subset), na.rm = T)
zero_in_control <- which(rowmeans <= threshold_af & rowsds <= threshold_sd) # 259 SNPs when threshold is zero
df_wide_bug <- df_wide_bug[zero_in_control,]

We transform the dataframe in a format that can be used for a heatmap

require(matrixStats)
# order by genome and position 
df_wide_bug <- df_wide_bug[order(df_wide_bug$chr, df_wide_bug$POS), ]
# get SNP statistics we can use for filtering later
# split in annot and data
df_wide_bug_mat <- df_wide_bug[,  grep("^16", colnames(df_wide_bug))]
annot <- df_wide_bug[, grep("^16", colnames(df_wide_bug), invert = T)]
# subset for relevant mouse ids
limited_mat <- df_wide_bug_mat[, which(substr(colnames(df_wide_bug_mat), 1, 4) %in% mouse_ids)]
rownames(limited_mat) <- paste0(annot$feature, "-",  annot$POS)

# Again filter for SNPs that are present in these mice
#keep <- which(rowSums(limited_mat > 0, na.rm = T) > 0)
#limited_mat <- limited_mat[keep,]
#annot <- annot[keep,]

We reorder the data frames (columns)

orders <- data.frame(
  day = as.integer(substr(colnames(limited_mat), 6, 7)),
  mouse = substr(colnames(limited_mat), 1, 4)
)
orders <- transform(orders,
                    mouse = factor(mouse, labels = mouse_ids)
)

orders2 <- orders[with(orders, order(mouse, day)), ]
sample_id_in_order <- paste0(orders2$mouse, "-", orders2$day)

Heatmap annotations

require(ComplexHeatmap)
day_annot_1 <- ifelse(binDaysByPhase(
  as.integer(substr(colnames(limited_mat),
                    6, 7))) == "post-treatment", "*", "")
day_annot_1[which(
  translateMouseIdToTreatmentGroup(substr(colnames(limited_mat),
                                          1, 4)) == "Control")] <- "" # Reset arrows for control
day_annot_2 <- as.integer(substr(colnames(limited_mat), 6, 7))

# Annotation columns - should be in the order matched limited_mat
col_fun_ha <- circlize::colorRamp2(c(0, max(day_annot_2)), c("white", "grey50"))
column_ha <- ComplexHeatmap::HeatmapAnnotation(day = anno_simple(day_annot_2,
                                                 col = col_fun_ha,
                                                 pch = day_annot_1,
                                                 height = unit(.4, "cm"),
                                                 border = TRUE),
                               annotation_name_gp = gpar(fontsize = 6))


row_titles_occ <- as.data.frame(table(annot$feature))
row_titles_occ <- row_titles_occ[order(-row_titles_occ$Freq), ]
row_titles_occ <- row_titles_occ[which(row_titles_occ$Var1 != "hypothetical protein" & row_titles_occ$Var1 != "outside ORFs"), ]
row_titles_occ_frequent <- as.character(as.matrix(row_titles_occ[which(row_titles_occ$Freq >= 5), ]$Var1))

row_titles <- unique(annot$feature)
row_titles[which(row_titles == "outside ORFs")] <- ""
row_titles[which(row_titles == "hypothetical protein")] <- ""
row_titles[which(!row_titles %in% row_titles_occ_frequent)] <- ""
annot$p_tetracyclin_star <- gtools::stars.pval(annot$p_tetracyclin)

sig_ha_col_p_ciprofloxacin <- c("sig" = "white", "non-sig" = "white")
sig_ha_col_p_p_tetracyclin <- c("sig" = "white", "non-sig" = "white")
sig_ha_col_p_vancomycin <- c("sig" = "white", "non-sig" = "white")

pch_num <- 20
sig_ha <- ComplexHeatmap::rowAnnotation(Ciprofloxacin = ComplexHeatmap::anno_simple(ifelse(annot$p_ciprofloxacin < 0.05, "sig", "non-sig"),
                                            pch = ifelse(annot$p_ciprofloxacin < 0.05, pch_num, NA),
                                            border = TRUE,
                                            pt_size = unit(1, "mm"),
                                            col = sig_ha_col_p_ciprofloxacin,
                                            simple_anno_size = unit(.2, "cm")),
                        Tetracyclin = anno_simple(ifelse(annot$p_tetracyclin < 0.05, "sig", "non-sig"),
                                          pch = ifelse(annot$p_tetracyclin < 0.05, pch_num, NA),
                                          border = TRUE,
                                          pt_size = unit(1, "mm"),
                                          col = sig_ha_col_p_p_tetracyclin,
                                          simple_anno_size = unit(.2, "cm")),
                        Vancomycin = anno_simple(ifelse(annot$p_vancomycin < 0.05, "sig", "non-sig"),
                                            pch = ifelse(annot$p_vancomycin < 0.05, pch_num, NA),
                                            border = TRUE,
                                            pt_size = unit(1, "mm"),
                                            col = sig_ha_col_p_vancomycin,
                                            simple_anno_size = unit(.2, "cm")),
                        annotation_name_gp = gpar(fontsize = 6))

snp_ids <- paste0(annot$chr,"-" ,annot$POS, "-",annot$ALT, "-", annot$REF)
# iso_data <- iso_data[which(iso_data$AF > 0.8),]
oligomm_ab_isolates$snp_ids <- paste0(oligomm_ab_isolates$chr,"-" ,
                                      oligomm_ab_isolates$POS, "-", 
                                      oligomm_ab_isolates$REF,"-",
                                      oligomm_ab_isolates$ALT)
in_iso <- ifelse(snp_ids %in% oligomm_ab_isolates$snp_ids == TRUE,
                 "in_isolates", "not_in_isolates")
sig_ha_col_in_iso <- c("in_isolates" = "white", "not_in_isolates" = "white")
df <- data.frame(chr = annot$chr, pos = annot$POS, phage = FALSE)
ht <- ComplexHeatmap::Heatmap(data.matrix(limited_mat),
                              name = "AF",
                              left_annotation = sig_ha,
                              na_col = "grey80",
                              cluster_rows = T,
                              row_gap = unit(0, "mm"),
                              row_split = annot$chr,
                              row_title_rot = 0,
                              row_title_gp = gpar(fontsize = 4),
                              top_annotation = column_ha,
                              col = col_fun,
                              column_split = factor(translateMouseIdToTreatmentGroup(substr(colnames(limited_mat), 1, 4)),
                                                    levels = c("Control", "Ciprofloxacin", "Tetracyclin", "Vancomycin")
                              ),
                              column_gap = unit(1, "mm"),
                              gap = unit(0, "mm"),
                              cluster_columns = F,
                              border_gp = gpar(col = "black", lty = 1),
                              show_row_dend = F,
                              show_column_names = F,
                              column_title_gp = gpar(fontsize = 6),
                              column_names_gp = gpar(fontsize = 3),
                              show_row_names = T,
                              row_names_gp = gpar(fontsize = 3))
ht
pdf(file = "Figure_SX1_2.pdf", width = 10, height = 15)
print(ht)
dev.off()

check if we have the 30S in C. incoccuum

library(OligoMMR)
data(oligomm_ab_wgs_imputed_wide) # contains AF profile for heatmap

oligomm_ab_wgs_imputed_wide_30s <- oligomm_ab_wgs_imputed_wide[which(oligomm_ab_wgs_imputed_wide$POS == 1519047),]

# split in annot and data
df_af <- oligomm_ab_wgs_imputed_wide_30s[,  grep("^16", colnames(oligomm_ab_wgs_imputed_wide_30s))]

df_af_melt <- reshape2::melt(df_af)
df_af_melt$mouse <- substr(df_af_melt$variable, 1, 4)
df_af_melt$day <- substr(df_af_melt$variable, 6, 7)
df_af_melt$group <- translateMouseIdToTreatmentGroup(df_af_melt$mouse)

library(ggplot2)
p <- ggplot(data = df_af_melt, aes(x = day, y = value, color = group, group = mouse))
p <- p + geom_point()
p <- p + facet_wrap(~ mouse)
p <- p + theme_pmuench()
p

try with a heatmap

library(OligoMMR)
data("oligomm_ab_wgs_imputed_long")
oligomm_ab_wgs_imputed_long <- oligomm_ab_wgs_imputed_long[which(oligomm_ab_wgs_imputed_long$POS == 1519047),]
'%!in%' <- function(x,y)!('%in%'(x,y))
remove <- c("rows_median", "rows_max", "rows_IQR", "rows_sd", "low_sd")
oligomm_ab_wgs_imputed_long <- oligomm_ab_wgs_imputed_long[ which(oligomm_ab_wgs_imputed_long$variable %!in% remove),]

library(tidyr)
df <- data.frame(day = oligomm_ab_wgs_imputed_long$day, 
                 mouse = oligomm_ab_wgs_imputed_long$mouse_id,
                 af = oligomm_ab_wgs_imputed_long$value)

test <- spread(df, key = day , value = af)
rownames(test) <- test$mouse
test$mouse <- NULL
library(ComplexHeatmap)

# annotation of day (columns)
day_annot_1 <- ifelse(binDaysByPhase(as.integer(colnames(test))) == "post-treatment", "*", "")
day_annot_2 <- as.integer(colnames(test))

# Annotation columns - should be in the order matched limited_mat
col_fun_ha <- circlize::colorRamp2(c(0, max(day_annot_2)), c("white", "grey50"))
column_ha <- ComplexHeatmap::HeatmapAnnotation(day = anno_simple(day_annot_2,
                                                 col = col_fun_ha,
                                                 pch = day_annot_1,
                                                 height = unit(.4, "cm"),
                                                 border = TRUE),
                               annotation_name_gp = gpar(fontsize = 6))

ht <- Heatmap(data.matrix(test),
        cluster_rows = F,

        name = "AF",
        border_gp = gpar(col = "black", lty = 1),
        row_split = factor(translateMouseIdToTreatmentGroup(rownames(test)),
                           levels = c("Control", "Ciprofloxacin", "Tetracyclin", "Vancomycin")),
        column_gap = unit(1, "mm"),
        row_title_rot = 0,
        top_annotation = column_ha,
        gap = unit(0, "mm"),
        row_gap = unit(0, "mm"),
        column_title = "30S ribosomal protein S10 (1519047)",
        row_names_gp = gpar(fontsize = 6),
        column_title_gp = gpar(fontsize = 6),
        row_title_gp = gpar(fontsize = 6),
        column_names_gp = gpar(fontsize = 6),
        na_col = "grey80",
        col = circlize::colorRamp2(c(0, 0.5, 1), c("white", "orange", "red")),
        cluster_columns = F)

pdf("Figure_SX2.pdf", width = 2.8, height = 1.4)
print(ht)
dev.off()


philippmuench/OligoMMR documentation built on April 15, 2022, 10:32 a.m.