packages <- c(
    "tidyverse",
    "grid",
    "gtable",
    "miR34AasRNAproject",
    "ggthemes",
    "printr",
    "stringr"
)
purrr::walk(packages, library, character.only = TRUE)
rm(packages)



Introduction

Wang et al. have previously published concerning a antisense transcript arising from the miR34a locus that they named "lnc34a". Wang et al. state find that this transcript is expressed at "significantly higher levels in colon cancer stem cells spheres (CCSCs)" compared to 9 commonly used colorectal cancer cell lines. After obtaining the sequence for lnc34a we could confirm that neither our primer walk analysis or our 3' RACE detected the lnc34a transcripts presence in the cell lines we had utilized. To further investigate the presence of lnc34a in non-CCSC cell lines we interogated CAGE data from RIKEN to investigate the possibility of an alternative transcription start site upstream of the transcription start site we had characterized.

Methods

nrCellLinesDown <- function() {
  getData("Supplementary Figure 8") %>% 
    pull(cell) %>%
    unique() %>%
    length()
}

cellLinesInFinal <- function() {
  getData("Supplementary Figure 8") %>%
    pull(cell) %>%
    unique()
}

RNAprepTable <- function() {
  getData("Supplementary Figure 8") %>%
    mutate(`rna extract` = str_replace(`rna extract`, "total", "total RNA")) %>%
    select(`rna extract`) %>%
    dplyr::count(`rna extract`)
}

localizationTable <- function() {
  getData("Supplementary Figure 8") %>%
    select(localization) %>%
    dplyr::count(localization)
}

#add function to show polyA and total RNA table

All available CAGE data from r nrCellLinesDown() cell lines was downloaded from UCSC with the script entitled "lnc34a CAGE" in the data-raw/saveRDS.R file. Of these r cellLinesInFinal() %>% length() cell lines had CAGE transcription start sites mapping to the plus strand of chromosome 1 and in regions corresponding to 200 bp upstream of the lnc34a start site (9241796 - 200) and 200 bp upstream of the GENCODE annotated miR34a asRNA start site (9242263 + 200). These cell lines included: r cellLinesInFinal(). All CAGE reads were plotted and the RPKM of the individual reads was used to color each read to indicate their relative abundance. In addition, a density plot shows the distribution of the CAGE reads in the specified interval. Further information concerning RNA extraction and localization of the samples included is shown below:

Number of samples from included RNA extraction methods:

RNAprepTable()

Number of samples from included RNA localizations:

localizationTable()



Results

#genes and primers
gdata <- tibble(
  gene = c(rep("miR34a asRNA", 5), rep("lnc34a", 3)),
  fill = gene,
  feature = c(
    "exon", "intron", "exon", "intron",
    "exon", "exon", "intron", "exon"
  ),
  start = c(
    9242262, 9242375, 9243691, 9243866,
    9251451, 9241796, 9242354, 9256971),
  stop = c(
    9242375, 9243691, 9243866, 9251451,
    9252148, 9242354, 9256971, 9257102
  )
)

#try to cut the data so that the CAGE reads are more visable in the figure.
gdata <- filter(gdata, start < 9243000) %>%
  mutate(stop = if_else(feature == "intron", 9243000, stop))

#add primer walk primers
pw <- tibble(
  fill = paste("F", 10:15, sep = ""),
  gene = "Primers",
  feature = rep("primer", 6),
  start = c(9242282, 9242170, 9242037, 9241944, 9241838, 9241734),
  stop = c(9242303, 9242192, 9242058, 9241967, 9241861, 9241757)
)

gdata <- bind_rows(gdata, pw) %>%
  mutate(gene = readr::parse_factor(gene, levels = c("miR34a asRNA", "lnc34a", "Primers")))

#cage data
cage <- getData("Supplementary Figure 8") %>%
  select(start, stop, level) %>%
  group_by(start, stop) %>%
  summarize(level = sum(level)) %>%
  ungroup() %>%
  arrange(desc(level)) %>%
  mutate(y = 1:n())
###plot rmarkdown

#genes
p1 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = filter(gdata, feature %in% c("exon", "primer")),
    aes(xmin = start, xmax = stop, y = gene, fill = fill, group = gene),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    colour = "gray"
  ) +
  geom_segment(
    data = filter(gdata, feature == "intron"),
    aes(x = start, xend = stop, y = gene, yend = gene, group = gene),
    linetype = "dotted",
    colour = "gray",
    size = 1
  ) +
  geom_text(
    data = filter(gdata, feature == "primer"),
    aes(x = start + 10, y = 2.5, label = fill),
    size = 3,
    angle = 90,
    colour = "grey20"
  ) +
  ggthemes::theme_few() +
  ggthemes::scale_fill_ptol() +
  xlim(9241596, 9243000) +
  labs(x = "Chromosome 1", y = "Genes") +
  guides(fill = FALSE)

#cage
p2 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = cage,
    aes(xmin = start, xmax = stop, y = y, fill = level),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    arrow_body_height = unit(1, "mm"),
    colour = "gray",
    show.legend = TRUE
  ) +
  ggthemes::theme_few() +
  viridis::scale_fill_viridis() +
  theme(
    axis.title.x = element_blank(),
    legend.position = "top",
    axis.text = element_blank(),
    axis.ticks = element_blank()
  ) +
  labs(y = "Cage reads") +
  xlim(9241596, 9243000) +
  guides(fill = guide_colourbar(
    title = "RPKM", 
    title.position = "top", 
    title.hjust = 0.5)
  )

#density
p3 <- ggplot(data = NULL) + 
  geom_line(data = cage, aes(x = start), stat = "density") +
  ggthemes::theme_few() +
  xlim(9241596, 9243000) +
  theme(
    axis.text = element_blank(),
    axis.title.x = element_blank(),
    axis.ticks = element_blank()
  ) +
  labs(y = "Density")

grobz <- lapply(list(p1, p2, p3), ggplotGrob)
g <- rbind(grobz[[2]], grobz[[3]], grobz[[1]], size = "last")
grid.newpage()
grid.draw(resize_heights(g, unit(c(3,1,1), "null")))

#plot pdf

#genes
p1 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = filter(gdata, feature %in% c("exon", "primer")),
    aes(xmin = start, xmax = stop, y = gene, fill = fill),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    arrow_body_height = unit(1, "mm"),
    colour = "gray",
    lwd = 0.1
  ) +
  geom_segment(
    data = filter(gdata, feature == "intron"),
    aes(x = start, xend = stop, y = gene, yend = gene),
    linetype = "dotted",
    colour = "gray",
    size = 0.55
  ) +
  geom_text(
    data = filter(gdata, feature == "primer"),
    aes(x = start + 10, y = 2.55, label = fill),
    size = 1.75,
    angle = 90,
    colour = "grey20"
  ) +
  ggthemes::theme_few() +
  ggthemes::scale_fill_ptol() +
  xlim(9241596, 9243000) +
  labs(x = "Chromosome 1", y = "Genes") +
  guides(fill = FALSE) +
  theme(
    axis.text = element_text(size = 6),
    axis.title.x = element_text(size = 6),
    axis.title.y = element_text(size = 6, margin = margin(r = 8, "cm")),
    axis.ticks.length = unit(0.5, "mm"),
    axis.ticks = element_line(size = 0.15),
    panel.border = element_rect(fill = NA, size = 0.1),
    plot.margin = unit(c(0.01, 1, 0.01, 1), "mm")
  )

#cage
p2 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = cage,
    aes(xmin = start, xmax = stop, y = y, fill = level),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    arrow_body_height = unit(1, "mm"),
    colour = NA,
    lwd = 0.1,
    show.legend = TRUE
  ) +
  ggthemes::theme_few() +
  viridis::scale_fill_viridis() +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 6),
    legend.position = "top",
    legend.title = element_text(size = 5),
    legend.text = element_text(size = 4),
    legend.box.spacing = unit(3, "pt"),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.border = element_rect(fill = NA, size = 0.1),
    plot.margin = unit(c(0.01, 1, 0.01, 1), "mm"),
    strip.text.y = element_text(angle = 0)
  ) +
  labs(y = "Cage reads") +
  xlim(9241596, 9243000) +
  guides(fill = guide_colourbar(
    title = "RPKM", 
    title.position = "top", 
    title.hjust = 0.5,
    barheight = unit(2, "mm")
  ))

#density
p3 <- ggplot(data = NULL) + 
  geom_line(
    data = cage, 
    aes(x = start), 
    stat = "density", 
    lwd = 0.1
  ) +
  ggthemes::theme_few() +
  xlim(9241596, 9243000) +
  theme(
    axis.text = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 6),
    axis.ticks = element_blank(),
    panel.border = element_rect(fill = NA, size = 0.1)
    #plot.margin = unit(c(0, 1, 2, 1), "mm")
  ) +
  labs(y = "Density")

grobz <- lapply(list(p1, p2, p3), ggplotGrob)
g <- rbind(grobz[[2]], grobz[[3]], grobz[[1]], size = "last")
g <- resize_heights(g, unit(c(5, 1, 2), "null"))
#grid.draw(g)

# path <- file.path("~/GitHub/miR34a_asRNA_project/inst", fileMap(type = "pdf")["Supplementary Figure 8"][[1]])
# ggsave(
#   plot = g,
#   filename = path,
#   device = cairo_pdf,
#   height = 100,
#   width = 90,
#   units = "mm"
# )
CAGE tags (top), CAGE tag density (middle), and antisense genes at the miR34a locus and primers from the primer walk assay (bottom). CAGE tags are colored according to their corresponding RPKM values.



Conclusions

The results show a high density of CAGE tags aligning to the region corresponding to the annotated miR34a asRNA start site. Several additional peaks, albeit with a much lower average expression, aligns slightly upstream of the annotated miR34a asRNA start site, one of which, corresponds to the upstream start site detected in our primer walk analysis (Figure 1e). Despite this, we find no CAGE tags aligning at the transcription start site or upstream of the transcription start site of the lnc34a transcript. This potentially indicates that lnc34a is tightly regulated and specifically expressed in the CCSC context, as claimed by the authors. An alternative intrepretation could be that lnc34a expression is present in a subset of the examined cell lines although at too low levels to be detected. Finally, lnc34a may not be 5'-capped precluding its detection by CAGE.





sessionInfo()


GranderLab/miR34a_asRNA_project documentation built on May 26, 2019, 7:26 a.m.