packages <- c(
  "tidyverse",
  "grid",
  "gtable",
  "miR34AasRNAproject",
  "printr"
)
purrr::walk(packages, library, character.only = TRUE)
rm(packages)



Introduction

Wang et al. have previously published concerning a antisense transcript arising from the miR34a locus that they named "lnc34a". Wang et al. state find that this transcript is expressed at "significantly higher levels in colon cancer stem cells spheres (CCSCs)" compared to 9 commonly used colorectal cancer cell lines. After obtaining the sequence for lnc34a we could confirm that neither our primer walk analysis or our 3' RACE detected the lnc34a transcripts presence in the cell lines we had utilized. To further investigate the presence of lnc34a in non-CCSC cell lines we interogated RNAseq splice junction data generated in conjunction with the ENCODE project to search for introns present at the miR34a locus.



Methods

nrCellLinesDown <- function() {
  dataUrl <- "https://github.com/GranderLab/miR34a_asRNA_project/raw/master/"
  url <- fileMap(type = "txt")["Supplementary Figure 7"][[1]]

  read_tsv(gzcon(url(file.path(dataUrl, url)))) %>%
    pull(16) %>% 
    gsub(
      "^http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeCshlLongRnaSeq/wgEncodeCshlLongRnaSeq(.*)Cell.*", 
      "\\1",
      .
    ) %>%
    unique() %>%
    length()
}

cellLinesInFinal <- function() {
  getData("Supplementary Figure 7") %>%
    pull(filename) %>%
    gsub(
      "^wgEncodeCshlLongRnaSeq(.*)Cell.*",
      "\\1",
      .
    ) %>%
    unique() %>%
    sort()
}

RNAprepTable <- function() {
  final <- getData("Supplementary Figure 7") %>%
    select(filename) %>%
    distinct() %>%
    mutate(filename = str_replace(filename, "^(.*)\\.bedRnaElements.*", "\\1"))

  dataUrl <- "https://github.com/GranderLab/miR34a_asRNA_project/raw/master/"
  url <- fileMap(type = "txt")["Supplementary Figure 7"][[1]]
  read_tsv(gzcon(url(file.path(dataUrl, url)))) %>%
    filter(X__15 %in% pull(final, filename)) %>%
    rename(`rna extract` = X__3) %>%
    select(`rna extract`) %>%
    count(`rna extract`)
}

All available whole cell RNAseq splice junction data from the ENCODE project originating from the Cold Spring Harbor Lab from r nrCellLinesDown() cell lines was downloaded from the UCSC genome browser with the script entitled "lnc34a splice junctions" in the data-raw/saveRDS.R file. Of these cell lines, r cellLinesInFinal() %>% length() had spliced reads mapping to the plus strand of chromosome 1 and in the region between the lnc34a start (9241796) and transcription termination (9257102) site (note that miR34a asRNA resides totally within this region). Splice junctions from the following cell lines were included in the final figure: r cellLinesInFinal(). All splice junctions were plotted and colored according to the number of reads corresponding to each. Information regarding the RNA extraction is included below:

Number of samples from included RNA extraction methods:

RNAprepTable()



Results

#locus
data <- tibble(
  gene = readr::parse_factor(
    c(rep("miR34a asRNA", 5), rep("lnc34a", 3)),
    levels = c("miR34a asRNA", "lnc34a")
  ),
  feature = c(
    "exon", "intron", "exon", "intron",
    "exon", "exon", "intron", "exon"
  ),
  start = c(
    9242262, 9242375, 9243691, 9243866,
    9251451, 9241796, 9242354, 9256971),
  stop = c(
    9242375, 9243691, 9243866, 9251451,
    9252148, 9242354, 9256971, 9257102
  )
)

#splice junctions
splJnc <- getData("Supplementary Figure 7") %>%
  mutate(cellline = gsub(".*LongRnaSeq(.*)Cell.*", "\\1", filename)) %>%
  select(cellline, start, stop, score2) %>%
  group_by(start, stop) %>%
  summarize(n = sum(score2)) %>%
  ungroup() %>%
  filter(n > 2) %>%
  arrange(desc(n)) %>%
  mutate(y = 1:length(.data$n))
#plot rmarkdown
###locus
p1 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = filter(data, feature == "exon"),
    aes(xmin = start, xmax = stop, y = gene, fill = gene),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    colour = "gray"
  ) +
  ggthemes::theme_few() +
  ggthemes::scale_fill_ptol() +
  geom_segment(
    data = filter(data, feature == "intron"),
    aes(x = start, xend = stop, y = gene, yend = gene),
    linetype = "dotted",
    colour = "gray",
    size = 1
  ) +
  xlim(9241000, 9257900) +
  labs(x = "Chromosome 1", y = "Genes") +
  guides(fill = FALSE)

##splice junctions
p2 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = splJnc,
    aes(xmin = start, xmax = stop, y = y, fill = n),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    arrow_body_height = unit(1, "mm"),
    colour = "gray"
  ) +
  viridis::scale_fill_viridis() +
  ggthemes::theme_few() +
  theme(
    axis.title.x = element_blank(),
    legend.position = "top",
    axis.text = element_blank(),
    axis.ticks = element_blank()
  ) +
  labs(y = "Splice junctions") +
  xlim(9241000, 9257900) +
  guides(fill = guide_colourbar(title = "Reads", title.position = "top", title.hjust = 0.5))

grobz <- lapply(list(p1, p2), ggplotGrob)
g <- rbind(grobz[[2]], grobz[[1]], size = "last")
grid.newpage()
grid.draw(resize_heights(g, unit(c(3,1), "null")))

#plot pdf
#genes
p1 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = filter(data, feature == "exon"),
    aes(xmin = start, xmax = stop, y = gene, fill = gene),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    arrow_body_height = grid::unit(1, "mm"),
    colour = "gray",
    lwd = 0.1
  ) +
  ggthemes::theme_few() +
  ggthemes::scale_fill_ptol() +
  geom_segment(
    data = filter(data, feature == "intron"),
    aes(x = start, xend = stop, y = gene, yend = gene),
    linetype = "dotted",
    colour = "gray",
    size = 0.25
  ) +
  xlim(9241000, 9257900) +
  labs(x = "Chromosome 1", y = "") +
  guides(fill = FALSE) +
  theme(
    axis.text = element_text(size = 6),
    axis.title.x = element_text(size = 6),
    axis.title.y = element_text(size = 6, margin = margin(r = 8, "cm")),
    axis.ticks.length = unit(0.5, "mm"),
    axis.ticks = element_line(size = 0.15),
    panel.border = element_rect(fill = NA, size = 0.1),
    plot.margin = unit(c(0.01, 1, 0.01, 1), "mm")
  )

#splice junctions
p2 <- ggplot(data = NULL) +
  gggenes::geom_gene_arrow(
    data = splJnc,
    aes(xmin = start, xmax = stop, y = y, fill = n),
    arrowhead_width = unit(0, "mm"),
    arrowhead_height = unit(0, "mm"),
    arrow_body_height = unit(2, "pt"),
    colour = NA,
    lwd = 0.1,
    show.legend = TRUE
  ) +
  ggthemes::theme_few() +
  viridis::scale_fill_viridis() +
  theme(
    axis.title.x = element_blank(),
    axis.title.y = element_text(size = 6),
    legend.position = "top",
    legend.title = element_text(size = 7),
    legend.text = element_text(size = 6),
    legend.box.spacing = unit(3, "pt"),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.border = element_rect(fill = NA, size = 0.1),
    plot.margin = unit(c(0.01, 1, 0.01, 1), "mm"),
    strip.text.y = element_text(angle = 0)
  ) +
  labs(y = "Spliced reads") +
  xlim(9241000, 9257900) +
  guides(fill = guide_colourbar(
    title = "Read count",
    title.position = "top",
    title.hjust = 0.5,
    barheight = unit(4, "pt"),
    barwidth = unit(50, "pt")
  ))

grobz <- lapply(list(p1, p2), ggplotGrob)
g <- rbind(grobz[[2]], grobz[[1]], size = "last")
g <- resize_heights(g, unit(c(6, 1), "null"))

# path <- file.path("~/GitHub/miR34a_asRNA_project/inst", fileMap(type = "pdf")["Supplementary Document 1b"][[1]])
# ggsave(
#   plot = g,
#   filename = path,
#   device = cairo_pdf,
#   height = 60,
#   width = 83,
#   units = "mm"
# )
Splice junctions (top) detected in RNAseq data from the ENCODE project and antisense genes (bottom) at the miR34a locus. Splice junctions are colored according to the corresponding total read count. In cases where the exact same read was detected multiple times the read count was summed.



Conclusions

Multiple splice junctions correspond to the miR34a asRNA transcrips and its isoforms (Supplementary Fig 2c.) with those being the most highly expressed corresponding to the annotated miR34a asRNA transcript. Despite this, no splice junctions appear to correspond to the intron in the lnc34a transcript. This potentially indicates that lnc34a is tightly regulated and specifically expressed in the CCSC context, as claimed by the authors. An alternative intrepretation could be that lnc34a expression is present in a subset of the examined cell lines although at too low levels to be detected. Finally, lnc34a may not be expressed in any of the cell lines examined.





sessionInfo()


GranderLab/miR34a_asRNA_project documentation built on May 26, 2019, 7:26 a.m.