packages <- c( "tidyverse", "printr", "ggthemes", "readr", "miR34AasRNAproject", "grid", "gtable" ) purrr::walk(packages, library, character.only = TRUE) rm(packages)
We interogated RNA sequencing data from TCGA to determine in miR34a and miR34a asRNA expression was correlated in both TP53 wild type and mutated samples.
RNA-Seq data and copy number data were downloaded from TCGA and processed as described previously (Ashouri et al. 2016). Briefly, RNA-Seq data were aligned to the human hg19 assembly and quantified using GENCODE (v19) annotated HTSeq-counts and FPKM normalizations. Expression data from miR34a and miR34 asRNA (identified as RP3-510D11.2) were used for further analysis. Copy number amplitudes for GENCODE genes were determined from segmented copy-number data. Samples that were diploid for miR34 asRNA were identified as those samples that had copy number amplitudes between -0.1 and 0.1.
Somatic mutation data were downloaded from the Genomics Data Commons data portal (GDC) as mutation annotation format (maf) files, called using Mutect2 on 30/10/2017 (v7) (Grossman et al. 2016).
data <- getData("Supplementary Figure 1b") #normalize expression values data <- mutate( data, RP3 = log2(RP3 / max(RP3)), miR34a = log2(miR34a / max(miR34a)) ) #add BRCA subtypes data <- data %>% mutate(PAM50 = if_else(is.na(PAM50), "", PAM50)) %>% mutate(cancer_PAM50 = gsub("BRCA", "BRCA ", paste(cancer, PAM50, sep = ""))) #sort data <- arrange(data, cancer_PAM50, TP53, RP3) #remove infinite data data <- filter(data, !is.infinite(RP3) & !is.infinite(miR34a)) #diploid only data <- filter(data, abs(RP3_cna) < 0.1) ##prepare for plotting data <- data %>% mutate(TP53 = if_else(TP53 == 0, "TP53 wt", "TP53 mut")) %>% select(TCGA_cancer_id, TP53, cancer_PAM50, RP3, miR34a) %>% gather(gene, value, -(TCGA_cancer_id:cancer_PAM50)) %>% mutate(gene = if_else(gene == "RP3", "lncTAM34a", "miR34a")) #remove cancers that don't have at least 5 patients in both TP53 wt and mutated data <- data %>% group_by(cancer_PAM50, TP53, gene) %>% filter(n() > 5) %>% group_by(cancer_PAM50, gene) %>% filter(length(unique(TP53)) == 2) %>% ungroup() #calculate stats stats <- data %>% group_by(cancer_PAM50, gene, TP53) %>% summarize(value = list(value)) %>% spread(TP53, value) %>% mutate( median_wt = median(unlist(`TP53 wt`)), median_mut = median(unlist(`TP53 mut`)), CI95l_wt = t.test(unlist(`TP53 wt`))$conf.int[1], CI95l_mut = t.test(unlist(`TP53 mut`))$conf.int[1], CI95h_wt = t.test(unlist(`TP53 wt`))$conf.int[2], CI95h_mut = t.test(unlist(`TP53 mut`))$conf.int[2], pValue = wilcox.test(unlist(`TP53 wt`), unlist(`TP53 mut`), alternative = "two.sided")$p.value ) %>% ungroup() %>% gather(metric, value, -(cancer_PAM50:`TP53 wt`), -pValue) %>% separate(col = metric, into = c("metric", "TP53"), sep = "_") %>% spread(metric, value) %>% mutate(TP53 = if_else(TP53 == "wt", "TP53 wt", "TP53 mut")) %>% pFormat(.) #add fake data where p53 mut is missing (will be hidden in plot) to keep empty space data <- data %>% mutate(TP53 = parse_factor(TP53, levels = c("TP53 wt", "TP53 mut"))) %>% mutate(gene = parse_factor(gene, levels = c("lncTAM34a", "miR34a"))) %>% mutate(cancer_PAM50 = parse_factor(cancer_PAM50, levels = rev(unique(cancer_PAM50)))) stats <- stats %>% mutate(TP53 = parse_factor(TP53, levels = c("TP53 wt", "TP53 mut"))) %>% mutate(gene = parse_factor(gene, levels = c("lncTAM34a", "miR34a"))) %>% mutate(cancer_PAM50 = parse_factor(cancer_PAM50, levels = rev(unique(cancer_PAM50))))
ggplot(data = NULL) + geom_jitter( data = data, aes( x = cancer_PAM50, y = value, colour = TP53 ), position = position_dodge(width = 0.75), alpha = 0.5 ) + geom_text( data = stats, aes( x = cancer_PAM50, y = 1, label = pFormat ) ) + geom_point( data = stats, aes( x = cancer_PAM50, y = median, group = TP53 ), colour = "grey30", shape = 124, size = 4, lwd = 1, position = position_dodge(width = 0.75) ) + facet_wrap(~gene, ncol = 2) + coord_flip(ylim = c(-12, 2.05)) + labs( x = "Cancer", y = "Expression" ) + theme_few() + scale_colour_economist() + theme(legend.position = "top") + guides(colour = guide_legend(title = "TP53 status")) #plot pdf figure <- ggplot(data = NULL) + geom_jitter( data = data, aes( x = cancer_PAM50, y = value, colour = TP53 ), position = position_dodge(width = 0.75), alpha = 0.5, size = 1 ) + geom_text( data = stats, aes( x = cancer_PAM50, y = 1, label = pFormat ), size = 3.25 ) + geom_point( data = stats, aes( x = cancer_PAM50, y = median, group = TP53 ), colour = "grey30", shape = 124, size = 4, lwd = 1, position = position_dodge(width = 0.75) ) + facet_wrap(~gene, ncol = 2) + coord_flip(ylim = c(-12, 2.05)) + labs( x = "Cancer", y = "Expression" ) + theme_few() + scale_colour_economist() + theme( plot.margin = unit(rep(0.1, 4), "lines"), legend.position = "top", legend.background = element_blank(), legend.box.spacing = unit(5, "pt"), #this controls the spacing between strip.text.x and the legend legend.margin = margin(rep(0, 4), unit = "pt"), legend.key.size = unit(1/5, "cm"), legend.title = element_text(size = 9), legend.text = element_text(size = 7), axis.title.y = element_blank(), axis.title.x = element_text(size = 9, margin = margin(t = 5)), axis.text.y = element_text(size = 10), axis.text.x = element_text(size = 10), axis.ticks = element_line(size = 0.25), axis.ticks.length = unit(1/15, "cm"), panel.border = element_rect(fill = NA, size = 0.15) ) + guides(colour = guide_legend(title = "TP53 status", override.aes = list(size = 3, alpha = 1))) ##save to pdf # path <- file.path("~/GitHub/miR34a_asRNA_project/inst", fileMap(type = "pdf")["Figure 1-Supplement 1b"][[1]]) # ggsave( # plot = figure, # filename = path, # device = cairo_pdf, # height = 135, # width = 205, # units = "mm" # )
miR34a asRNA and miR34a tend to exhibit a decreased expression in cancer types examined in patients with TP53 mutations.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.