knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

library(miplicorn)
library(ggplot2)

Parse

While {MIPTools} provides several .csv files that can be analyzed, parsing such files is difficult because of their non-rectangular structure. As such, attempting to read these files with default parameters fails.

cov_file <- miplicorn::miplicorn_example("coverage_AA_table.csv")
vroom::vroom(cov_file, col_names = FALSE, n_max = 10, col_select = 1:5)

As visible, there are six rows of metadata, which specify the gene ID, gene, mutation name, etc. The remaining rows contain the data we are actually interested in: the rows contain the samples and the columns contain the positions we are interested in. The difficult part of reading in these files is that the metadata must be extracted and treated differently from the data itself.

{miplicorn}, therefore, provides a family of functions: read_tbl_*() which quickly read in such non-rectangular files. The functions generate a tibble where each row represents a sample and a position. Thus, there will be multiple entries for each unique sample.

cov_file <- miplicorn::miplicorn_example("coverage_AA_table.csv")

data <- miplicorn::read_tbl_coverage(cov_file)
data

Manipulate

Amino acids

In some the user may want to convert amino acid abbreviations from the three to one letters abbreviation, or vice versa for easier interpretation of data.

data %>%
  dplyr::mutate(aa_change = convert_three(aa_change))

Sort

In plotting data, it is useful to be able to control the order in which data appears. While dplyr::arrange() provides the functionality to sort numeric or character data, it lacks the ability to naturally sort alphanumeric vectors, vectors containing both letters and numerics. Furthermore, the ordering of data is not kept when fed into plotting functions. arrange_natural() attempts to address these limitations.

arrange_natural(data, sample, gene)

Visualize

There are an almost limitless different ways to visualize a single set of data. While {miplicorn} cannot address every method, it aims to simplify the creation of key figures.

Chromosome map

probes <- tibble::tribble(
  ~chrom, ~start, ~end, ~probe_set,
  "chr13", 816953, 817119, "Probe Set 1",
  "chr12", 803357, 803591, "Probe Set 2",
  "chr8", 502688, 502935, "Probe Set 2",
  "chr10", 108618, 108784, "Probe Set 1",
  "chr6", 1140002, 1140248, "Probe Set 2",
  "chr12", 720907, 721160, "Probe Set 2",
  "chr7", 376292, 376525, "Probe Set 2",
  "chr10", 557614, 557778, "Probe Set 1",
  "chr4", 820945, 821202, "Probe Set 2",
  "chr7", 370729, 370997, "Probe Set 2",
  "chr13", 1595910, 1596051, "Probe Set 1",
  "chr12", 804302, 804503, "Probe Set 2",
  "chr7", 387438, 387683, "Probe Set 2",
  "chr2", 760189, 760450, "Probe Set 3",
  "chr10", 1339875, 1340019, "Probe Set 1",
  "chr5", 969465, 969717, "Probe Set 2",
  "chr5", 950763, 950992, "Probe Set 2",
  "chr8", 1001846, 1002015, "Probe Set 1",
  "chr14", 265959, 266224, "Probe Set 2",
  "chr12", 391646, 391915, "Probe Set 3",
  "chr3", 943959, 944100, "Probe Set 1",
  "chr10", 1399202, 1399341, "Probe Set 1",
  "chr6", 984389, 984549, "Probe Set 1",
  "chr6", 1171238, 1171462, "Probe Set 2",
  "chr12", 907196, 907358, "Probe Set 1",
  "chr12", 2045736, 2045974, "Probe Set 2",
  "chr4", 797418, 797668, "Probe Set 2",
  "chr8", 412943, 413111, "Probe Set 1",
  "chr13", 1744851, 1745080, "Probe Set 2",
  "chr13", 1715897, 1716163, "Probe Set 2",
  "chr14", 2335525, 2335776, "Probe Set 2",
  "chr9", 619453, 619695, "Probe Set 2",
  "chr7", 691698, 691838, "Probe Set 1",
  "chr1", 487022, 487168, "Probe Set 1",
  "chr4", 748092, 748333, "Probe Set 3",
  "chr12", 839856, 840023, "Probe Set 1",
  "chr5", 908572, 908812, "Probe Set 2",
  "chr6", 574794, 575021, "Probe Set 3",
  "chr11", 645286, 645537, "Probe Set 2",
  "chr14", 1397696, 1397835, "Probe Set 1",
  "chr8", 554388, 554624, "Probe Set 2",
  "chr6", 573950, 574173, "Probe Set 3",
  "chr8", 502885, 503028, "Probe Set 1",
  "chr13", 1683151, 1683393, "Probe Set 2",
  "chr1", 480381, 480522, "Probe Set 1",
  "chr2", 614884, 615029, "Probe Set 1",
  "chr11", 1892305, 1892565, "Probe Set 3",
  "chr7", 325895, 326131, "Probe Set 2",
  "chr12", 693472, 693703, "Probe Set 2",
  "chr9", 1315401, 1315566, "Probe Set 1",
  "chr1", 468561, 468805, "Probe Set 3",
  "chr11", 577202, 577433, "Probe Set 2",
  "chr7", 425568, 425804, "Probe Set 2",
  "chr13", 1743186, 1743426, "Probe Set 2",
  "chr14", 279174, 279437, "Probe Set 2",
  "chr5", 970251, 970518, "Probe Set 2",
  "chr14", 2408554, 2408822, "Probe Set 2",
  "chr6", 566775, 566989, "Probe Set 3",
  "chr5", 402521, 402681, "Probe Set 1",
  "chr4", 683695, 683948, "Probe Set 2",
  "chr12", 641753, 641993, "Probe Set 2",
  "chr5", 889609, 889761, "Probe Set 1",
  "chr14", 1305357, 1305524, "Probe Set 1",
  "chr12", 719633, 719863, "Probe Set 2",
  "chr14", 294404, 294664, "Probe Set 3",
  "chr13", 1770922, 1771169, "Probe Set 2",
  "chr9", 491664, 491926, "Probe Set 2",
  "chr14", 189457, 189696, "Probe Set 2",
  "chr14", 270470, 270694, "Probe Set 2",
  "chr14", 2404644, 2404913, "Probe Set 2",
  "chr11", 501869, 502100, "Probe Set 2",
  "chr12", 847521, 847688, "Probe Set 1",
  "chr12", 742703, 742966, "Probe Set 2",
  "chr14", 342292, 342493, "Probe Set 2",
  "chr4", 821631, 821882, "Probe Set 2",
  "chr8", 173146, 173300, "Probe Set 1",
  "chr13", 931506, 931653, "Probe Set 1",
  "chr14", 1105617, 1105759, "Probe Set 1",
  "chr6", 937647, 937807, "Probe Set 1",
  "chr13", 748362, 748622, "Probe Set 3",
  "chr2", 799888, 800057, "Probe Set 1",
  "chr1", 192979, 193255, "Probe Set 3",
  "chr6", 1192567, 1192796, "Probe Set 2",
  "chr14", 298125, 298385, "Probe Set 3",
  "chr14", 1792936, 1793099, "Probe Set 1",
  "chr9", 515827, 516089, "Probe Set 2",
  "chr13", 1879289, 1879445, "Probe Set 1",
  "chr8", 579882, 580151, "Probe Set 2",
  "chr12", 2136484, 2136730, "Probe Set 2",
  "chr9", 747439, 747607, "Probe Set 1",
  "chr14", 384281, 384437, "Probe Set 1",
  "chr12", 530735, 530976, "Probe Set 3",
  "chr3", 828179, 828398, "Probe Set 3",
  "chr5", 575507, 575646, "Probe Set 1",
  "chr4", 1089761, 1089919, "Probe Set 1",
  "chr3", 124285, 124437, "Probe Set 1",
  "chr7", 450476, 450639, "Probe Set 1",
  "chr9", 543910, 544166, "Probe Set 2",
  "chr7", 381867, 382106, "Probe Set 2",
  "chr9", 573748, 573975, "Probe Set 2",
  "chr8", 703420, 703582, "Probe Set 1",
  "chr9", 459000, 459227, "Probe Set 2",
  "chr6", 571035, 571262, "Probe Set 3",
  "chr12", 1015227, 1015396, "Probe Set 1",
  "chr14", 316894, 317154, "Probe Set 2",
  "chr14", 2398154, 2398422, "Probe Set 2",
  "chr14", 315276, 315545, "Probe Set 2",
  "chr1", 190191, 190454, "Probe Set 3",
  "chr7", 434946, 435215, "Probe Set 2",
  "chr8", 477561, 477791, "Probe Set 2",
  "chr12", 1075942, 1076100, "Probe Set 1",
  "chr6", 1272918, 1273160, "Probe Set 2",
  "chr11", 604677, 604944, "Probe Set 2",
  "chr4", 749427, 749666, "Probe Set 3",
  "chr7", 411214, 411383, "Probe Set 1",
  "chr2", 814570, 814726, "Probe Set 1",
  "chr2", 762358, 762620, "Probe Set 3",
  "chr12", 2186353, 2186619, "Probe Set 2",
  "chr8", 416736, 416892, "Probe Set 1",
  "chr14", 2341513, 2341746, "Probe Set 2",
  "chr7", 1234411, 1234567, "Probe Set 1",
  "chr6", 1143688, 1143924, "Probe Set 2",
  "chr5", 982925, 983167, "Probe Set 2",
  "chr8", 526863, 527031, "Probe Set 1",
  "chr11", 610381, 610619, "Probe Set 2",
  "chr14", 1661055, 1661194, "Probe Set 1",
  "chr10", 92871, 93021, "Probe Set 1",
  "chr6", 577510, 577776, "Probe Set 3",
  "chr13", 1820748, 1821011, "Probe Set 2",
  "chr3", 812572, 812732, "Probe Set 1",
  "chr9", 541363, 541632, "Probe Set 2",
  "chr5", 952880, 953065, "Probe Set 2",
  "chr6", 1134116, 1134358, "Probe Set 2",
  "chr9", 595805, 596038, "Probe Set 2",
  "chr4", 828775, 829033, "Probe Set 2",
  "chr5", 765737, 765904, "Probe Set 1",
  "chr12", 711830, 712097, "Probe Set 2",
  "chr14", 422252, 422404, "Probe Set 1",
  "chr1", 191736, 191965, "Probe Set 3",
  "chr5", 803549, 803693, "Probe Set 1",
  "chr3", 756807, 756949, "Probe Set 1",
  "chr3", 749520, 749665, "Probe Set 1",
  "chr6", 572875, 573100, "Probe Set 3",
  "chr11", 677085, 677306, "Probe Set 2",
  "chr6", 1252562, 1252823, "Probe Set 2",
  "chr1", 159428, 159596, "Probe Set 1",
  "chr11", 578071, 578306, "Probe Set 2",
  "chr7", 612835, 612994, "Probe Set 1",
  "chr3", 826017, 826239, "Probe Set 3",
  "chr6", 577575, 577853, "Probe Set 3"
)

There are two built in ways to create chromosome maps, each with its own set of strengths and weaknesses. You can either make an interactive map or a more detailed karyoplot.

colours <- c("#006A8EFF", "#A8A6A7FF", "#B1283AFF")
map <- plot_chromoMap(genome_Pf3D7, probes, colours = colours)

# Used to embed into html
widgetframe::frameableWidget(map)


plot_karyoploteR(genome_Pf3D7, probes, colours = colours)

Average coverage

N.B. the remaining figures have not yet been incorporated into {miplicorn}, but as time goes on, more and more visualization methods will be added.

# For the purposes of visualization, we select a small subset of genes
relevant_mutations <- c(
  "dhps", "k13", "atp6", "crt", "cytb",
  "mdr1", "mdr2", "dhfr-ts"
)

# Determine average coverage
data <- data %>%
  dplyr::filter(gene %in% relevant_mutations) %>%
  dplyr::group_by(gene, mutation_name, aa_change) %>%
  dplyr::summarise(coverage = mean(as.numeric(coverage)), .groups = "keep") %>%
  arrange_natural(gene, mutation_name, aa_change)

# Plot
ggplot(data, aes(x = aa_change, y = coverage, fill = gene)) +
  geom_col() +
  labs(
    x = "Amino Acid Change",
    y = "Average Coverage",
    title = "Average Coverage by Mutation Site"
  ) +
  scale_fill_brewer(name = "Gene", palette = "PuBuGn") +
  theme_miplicorn() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

"Validated" mutations

# Create a fake list of validated mutations
set.seed(100)
valid_mutations <- tibble::tibble(
  mutation = unique(data$mutation_name),
  validated = sample(
    c("validated", "suspect"),
    replace = TRUE,
    size = length(unique(data$mutation_name))
  )
)

# Combine the fake list of validated mutations with the previous tibble
validated <- data %>%
  dplyr::left_join(valid_mutations, by = c("mutation_name" = "mutation")) %>%
  dplyr::relocate(validated, .after = mutation_name)
validated %>%
  dplyr::group_by(mutation_name, aa_change, gene, validated) %>%
  dplyr::summarise(coverage = mean(coverage), .groups = "keep") %>%
  ggplot(aes(x = aa_change, y = coverage, fill = gene, alpha = validated)) +
  geom_col() +
  labs(
    y = "Average Coverage",
    x = "Amino Acid Change",
    title = "Average Coverage by Mutation Site"
  ) +
  scale_fill_brewer(name = "Gene", palette = "PuBuGn") +
  theme_miplicorn() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

validated %>%
  dplyr::group_by(mutation_name, aa_change, gene, validated) %>%
  dplyr::summarise(coverage = mean(coverage), .groups = "keep") %>%
  ggplot(aes(x = aa_change, y = coverage, fill = gene)) +
  geom_col() +
  facet_grid(validated ~ .) +
  labs(
    y = "Average Coverage",
    x = "Amino Acid Change",
    title = "Average Coverage by Mutation Site"
  ) +
  scale_fill_brewer(name = "Gene", palette = "PuBuGn") +
  theme_miplicorn() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))


bailey-lab/miplicorn documentation built on March 19, 2023, 7:40 p.m.