knitr::opts_chunk$set(echo = TRUE)
Load packages:
library(MultiAssayExperiment) library(curatedTCGAData) library(TCGAutils)
"In TCGA data of BRCA patients, find the DNA somatic mutations within the first 2000 bp outside of the genes that are both expressed with FPKM > 3 and have at least a methylation in the same patient biospecimen, and extract these mutations of the top 5% patients with the highest number of such mutations."
First we load the 'BRCA' dataset with 'Mutation', 'RNASeq2GeneNorm' and 'Methylation' assays / experiments:
system.time( brca <- curatedTCGAData( diseaseCode = "BRCA", assays = c("Mutation", "RNASeq2GeneNorm", "Methylation"), dry.run = FALSE ) )
We then add gene symbol annotations with genomic ranges and drop any unmapped symbols. We also do the same for CpG probe identifiers:
brca0 <- symbolsToRanges(brca, unmapped = FALSE) if (!require("IlluminaHumanMethylation450kanno.ilmn12.hg19", quietly = TRUE)) BiocManager::install("IlluminaHumanMethylation450kanno.ilmn12.hg19") library(IlluminaHumanMethylation450kanno.ilmn12.hg19) brca1 <- CpGtoRanges(brca0, unmapped = FALSE)
Select only tumors in the data
sampleTables(brca1) head(sampleTypes) sampleselector <- lapply(colnames(brca1), function(x) TCGAsampleSelect(x, c("01", "06")) ) brca1 <- brca1[, sampleselector]
We check what probes "have at least a methylation..." across all samples:
meth.has.genes <- !is.na(rowData( brca1[["BRCA_Methylation_methyl450-20160128_ranged"]])$Gene_Symbol) brca1 <- subsetByRow(brca1, List("BRCA_Methylation_methyl450-20160128_ranged"=meth.has.genes)) genes <- rowData(brca1[["BRCA_Methylation_methyl450-20160128_ranged"]])$Gene_Symbol methylated <- !is.na(as.matrix( assay(brca1[["BRCA_Methylation_methyl450-20160128_ranged"]]) ))
Number of non-missing per gene in each column
# convert to integer first meth <- methylated * 1L system.time(meth1 <- rowsum(meth, group = genes)) # where genes have at least one methylation meth2 <- meth1 > 0L brca1 <- c(brca1, has.meth = SummarizedExperiment(meth2))
Get ranges and add "within the first 2000 bp outside of the genes"
rnaranges <- rowRanges(brca1[["BRCA_RNASeq2GeneNorm-20160128_ranged"]]) allROIs <- flank(rnaranges, 2000, both = TRUE)
Find any somatic mutations within regions of interest:
somatic <- function(scores, ranges, qranges) any(!is.na(scores)) genome(brca1[["BRCA_Mutation-20160128"]]) <- TCGAutils::translateBuild(genome(brca1[["BRCA_Mutation-20160128"]])) mutations <- RaggedExperiment::qreduceAssay( x = brca1[["BRCA_Mutation-20160128"]], query = allROIs, simplifyReduce = somatic, i = "Variant_Classification" ) rownames(mutations) <- names(rnaranges) mutations[is.na(mutations)] <- 0 brca1 <- c(brca1, mutations = SummarizedExperiment(mutations))
Handle technical replicates and take the sum of number of mutations per sample
reps <- replicated(brca1)[["mutations"]] compreps <- mergeReplicates(assay(brca1[["mutations"]]), reps, sum) brca1[["mutations"]] <- SummarizedExperiment(compreps)
Resolving replicates in other assays
brca2 <- brca1[, , c("BRCA_RNASeq2GeneNorm-20160128_ranged", "has.meth", "mutations")] rnamerged <- mergeReplicates(brca2[["BRCA_RNASeq2GeneNorm-20160128_ranged"]], replicates = replicated(brca2[, , "BRCA_RNASeq2GeneNorm-20160128_ranged"])[[1L]], simplify = function (x) { gt3 <- x > 3 if (any(gt3)) BiocGenerics::mean(x[gt3]) else x[[1]] } ) brca2[["BRCA_RNASeq2GeneNorm-20160128_ranged"]] <- rnamerged methmerge <- mergeReplicates(brca2[["has.meth"]], replicates = replicated(brca2[, , "has.meth"])[[1L]], simplify = any ) mutmerge <- mergeReplicates(brca2[["mutations"]], replicates = replicated(brca2[, , "mutations"])[[1L]], simplify = sum ) brca2[["has.meth"]] <- methmerge brca2[["mutations"]] <- mutmerge brca3 <- as(brca2, "MatchedAssayExperiment") brca3 <- intersectRows(brca3)
An expression threshold is used where "FPKM > 3". In our case, we use the better alternative: TPM > 3
https://haroldpimentel.wordpress.com/2014/05/08/what-the-fpkm-a-review-rna-seq-expression-units/ http://diytranscriptomics.com/Reading/files/wagnerTPM.pdf
keep <- assay(brca3, "BRCA_RNASeq2GeneNorm-20160128_ranged") > 3 & assay(brca3, "has.meth") & assay(brca3, "mutations") summary(colSums(keep)) sort(colSums(keep), decreasing = TRUE)[seq_len(round(ncol(keep)*0.05))]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.