packages <- c( "tidyverse", "grid", "gtable", "miR34AasRNAproject", "ggthemes", "printr", "stringr" ) purrr::walk(packages, library, character.only = TRUE) rm(packages)
Wang et al. have previously published concerning a antisense transcript arising from the miR34a locus that they named "lnc34a". Wang et al. state find that this transcript is expressed at "significantly higher levels in colon cancer stem cells spheres (CCSCs)" compared to 9 commonly used colorectal cancer cell lines. After obtaining the sequence for lnc34a we could confirm that neither our primer walk analysis or our 3' RACE detected the lnc34a transcripts presence in the cell lines we had utilized. To further investigate the presence of lnc34a in non-CCSC cell lines we interogated CAGE data from RIKEN to investigate the possibility of an alternative transcription start site upstream of the transcription start site we had characterized.
nrCellLinesDown <- function() { getData("Supplementary Figure 8") %>% pull(cell) %>% unique() %>% length() } cellLinesInFinal <- function() { getData("Supplementary Figure 8") %>% pull(cell) %>% unique() } RNAprepTable <- function() { getData("Supplementary Figure 8") %>% mutate(`rna extract` = str_replace(`rna extract`, "total", "total RNA")) %>% select(`rna extract`) %>% dplyr::count(`rna extract`) } localizationTable <- function() { getData("Supplementary Figure 8") %>% select(localization) %>% dplyr::count(localization) } #add function to show polyA and total RNA table
All available CAGE data from r nrCellLinesDown()
cell lines was downloaded from UCSC with the script entitled "lnc34a CAGE" in the data-raw/saveRDS.R file. Of these r cellLinesInFinal() %>% length()
cell lines had CAGE transcription start sites mapping to the plus strand of chromosome 1 and in regions corresponding to 200 bp upstream of the lnc34a start site (9241796 - 200) and 200 bp upstream of the GENCODE annotated miR34a asRNA start site (9242263 + 200). These cell lines included: r cellLinesInFinal()
. All CAGE reads were plotted and the RPKM of the individual reads was used to color each read to indicate their relative abundance. In addition, a density plot shows the distribution of the CAGE reads in the specified interval. Further information concerning RNA extraction and localization of the samples included is shown below:
Number of samples from included RNA extraction methods:
RNAprepTable()
Number of samples from included RNA localizations:
localizationTable()
#genes and primers gdata <- tibble( gene = c(rep("miR34a asRNA", 5), rep("lnc34a", 3)), fill = gene, feature = c( "exon", "intron", "exon", "intron", "exon", "exon", "intron", "exon" ), start = c( 9242262, 9242375, 9243691, 9243866, 9251451, 9241796, 9242354, 9256971), stop = c( 9242375, 9243691, 9243866, 9251451, 9252148, 9242354, 9256971, 9257102 ) ) #try to cut the data so that the CAGE reads are more visable in the figure. gdata <- filter(gdata, start < 9243000) %>% mutate(stop = if_else(feature == "intron", 9243000, stop)) #add primer walk primers pw <- tibble( fill = paste("F", 10:15, sep = ""), gene = "Primers", feature = rep("primer", 6), start = c(9242282, 9242170, 9242037, 9241944, 9241838, 9241734), stop = c(9242303, 9242192, 9242058, 9241967, 9241861, 9241757) ) gdata <- bind_rows(gdata, pw) %>% mutate(gene = readr::parse_factor(gene, levels = c("miR34a asRNA", "lnc34a", "Primers"))) #cage data cage <- getData("Supplementary Figure 8") %>% select(start, stop, level) %>% group_by(start, stop) %>% summarize(level = sum(level)) %>% ungroup() %>% arrange(desc(level)) %>% mutate(y = 1:n())
###plot rmarkdown #genes p1 <- ggplot(data = NULL) + gggenes::geom_gene_arrow( data = filter(gdata, feature %in% c("exon", "primer")), aes(xmin = start, xmax = stop, y = gene, fill = fill, group = gene), arrowhead_width = unit(0, "mm"), arrowhead_height = unit(0, "mm"), colour = "gray" ) + geom_segment( data = filter(gdata, feature == "intron"), aes(x = start, xend = stop, y = gene, yend = gene, group = gene), linetype = "dotted", colour = "gray", size = 1 ) + geom_text( data = filter(gdata, feature == "primer"), aes(x = start + 10, y = 2.5, label = fill), size = 3, angle = 90, colour = "grey20" ) + ggthemes::theme_few() + ggthemes::scale_fill_ptol() + xlim(9241596, 9243000) + labs(x = "Chromosome 1", y = "Genes") + guides(fill = FALSE) #cage p2 <- ggplot(data = NULL) + gggenes::geom_gene_arrow( data = cage, aes(xmin = start, xmax = stop, y = y, fill = level), arrowhead_width = unit(0, "mm"), arrowhead_height = unit(0, "mm"), arrow_body_height = unit(1, "mm"), colour = "gray", show.legend = TRUE ) + ggthemes::theme_few() + viridis::scale_fill_viridis() + theme( axis.title.x = element_blank(), legend.position = "top", axis.text = element_blank(), axis.ticks = element_blank() ) + labs(y = "Cage reads") + xlim(9241596, 9243000) + guides(fill = guide_colourbar( title = "RPKM", title.position = "top", title.hjust = 0.5) ) #density p3 <- ggplot(data = NULL) + geom_line(data = cage, aes(x = start), stat = "density") + ggthemes::theme_few() + xlim(9241596, 9243000) + theme( axis.text = element_blank(), axis.title.x = element_blank(), axis.ticks = element_blank() ) + labs(y = "Density") grobz <- lapply(list(p1, p2, p3), ggplotGrob) g <- rbind(grobz[[2]], grobz[[3]], grobz[[1]], size = "last") grid.newpage() grid.draw(resize_heights(g, unit(c(3,1,1), "null"))) #plot pdf #genes p1 <- ggplot(data = NULL) + gggenes::geom_gene_arrow( data = filter(gdata, feature %in% c("exon", "primer")), aes(xmin = start, xmax = stop, y = gene, fill = fill), arrowhead_width = unit(0, "mm"), arrowhead_height = unit(0, "mm"), arrow_body_height = unit(1, "mm"), colour = "gray", lwd = 0.1 ) + geom_segment( data = filter(gdata, feature == "intron"), aes(x = start, xend = stop, y = gene, yend = gene), linetype = "dotted", colour = "gray", size = 0.55 ) + geom_text( data = filter(gdata, feature == "primer"), aes(x = start + 10, y = 2.55, label = fill), size = 1.75, angle = 90, colour = "grey20" ) + ggthemes::theme_few() + ggthemes::scale_fill_ptol() + xlim(9241596, 9243000) + labs(x = "Chromosome 1", y = "Genes") + guides(fill = FALSE) + theme( axis.text = element_text(size = 6), axis.title.x = element_text(size = 6), axis.title.y = element_text(size = 6, margin = margin(r = 8, "cm")), axis.ticks.length = unit(0.5, "mm"), axis.ticks = element_line(size = 0.15), panel.border = element_rect(fill = NA, size = 0.1), plot.margin = unit(c(0.01, 1, 0.01, 1), "mm") ) #cage p2 <- ggplot(data = NULL) + gggenes::geom_gene_arrow( data = cage, aes(xmin = start, xmax = stop, y = y, fill = level), arrowhead_width = unit(0, "mm"), arrowhead_height = unit(0, "mm"), arrow_body_height = unit(1, "mm"), colour = NA, lwd = 0.1, show.legend = TRUE ) + ggthemes::theme_few() + viridis::scale_fill_viridis() + theme( axis.title.x = element_blank(), axis.title.y = element_text(size = 6), legend.position = "top", legend.title = element_text(size = 5), legend.text = element_text(size = 4), legend.box.spacing = unit(3, "pt"), axis.text = element_blank(), axis.ticks = element_blank(), panel.border = element_rect(fill = NA, size = 0.1), plot.margin = unit(c(0.01, 1, 0.01, 1), "mm"), strip.text.y = element_text(angle = 0) ) + labs(y = "Cage reads") + xlim(9241596, 9243000) + guides(fill = guide_colourbar( title = "RPKM", title.position = "top", title.hjust = 0.5, barheight = unit(2, "mm") )) #density p3 <- ggplot(data = NULL) + geom_line( data = cage, aes(x = start), stat = "density", lwd = 0.1 ) + ggthemes::theme_few() + xlim(9241596, 9243000) + theme( axis.text = element_blank(), axis.title.x = element_blank(), axis.title.y = element_text(size = 6), axis.ticks = element_blank(), panel.border = element_rect(fill = NA, size = 0.1) #plot.margin = unit(c(0, 1, 2, 1), "mm") ) + labs(y = "Density") grobz <- lapply(list(p1, p2, p3), ggplotGrob) g <- rbind(grobz[[2]], grobz[[3]], grobz[[1]], size = "last") g <- resize_heights(g, unit(c(5, 1, 2), "null")) #grid.draw(g) # path <- file.path("~/GitHub/miR34a_asRNA_project/inst", fileMap(type = "pdf")["Supplementary Figure 8"][[1]]) # ggsave( # plot = g, # filename = path, # device = cairo_pdf, # height = 100, # width = 90, # units = "mm" # )
The results show a high density of CAGE tags aligning to the region corresponding to the annotated miR34a asRNA start site. Several additional peaks, albeit with a much lower average expression, aligns slightly upstream of the annotated miR34a asRNA start site, one of which, corresponds to the upstream start site detected in our primer walk analysis (Figure 1e). Despite this, we find no CAGE tags aligning at the transcription start site or upstream of the transcription start site of the lnc34a transcript. This potentially indicates that lnc34a is tightly regulated and specifically expressed in the CCSC context, as claimed by the authors. An alternative intrepretation could be that lnc34a expression is present in a subset of the examined cell lines although at too low levels to be detected. Finally, lnc34a may not be 5'-capped precluding its detection by CAGE.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.