inst/doc/TCGAutils.R

## ---- eval=FALSE--------------------------------------------------------------
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("TCGAutils")

## ----include=TRUE,results="hide",message=FALSE,warning=FALSE------------------
library(TCGAutils)
library(curatedTCGAData)
library(MultiAssayExperiment)
library(RTCGAToolbox)
library(BiocFileCache)
library(rtracklayer)
library(R.utils)

## ----include=TRUE,results="hide",message=FALSE,warning=FALSE------------------
coad <- curatedTCGAData::curatedTCGAData(diseaseCode = "COAD",
    assays = c("CNASeq", "Mutation", "miRNA*",
        "RNASeq2*", "mRNAArray", "Methyl*"), dry.run = FALSE)

## -----------------------------------------------------------------------------
curatedTCGAData("COAD", "*")

## -----------------------------------------------------------------------------
sampleTables(coad)

## -----------------------------------------------------------------------------
data("sampleTypes")
head(sampleTypes)

## -----------------------------------------------------------------------------
(tnmae <- splitAssays(coad, c("01", "11")))

## -----------------------------------------------------------------------------
(matchmae <- as(tnmae[, , c(4, 6, 7)], "MatchedAssayExperiment"))

## -----------------------------------------------------------------------------
getSubtypeMap(coad)

## -----------------------------------------------------------------------------
getClinicalNames("COAD")

## -----------------------------------------------------------------------------
class(colData(coad)[["vital_status.x"]])
class(colData(coad)[["vital_status.y"]])

table(colData(coad)[["vital_status.x"]])
table(colData(coad)[["vital_status.y"]])

## -----------------------------------------------------------------------------
methcoad <- CpGtoRanges(coad)

## -----------------------------------------------------------------------------
mircoad <- mirToRanges(coad)

## -----------------------------------------------------------------------------
rag <- "COAD_Mutation-20160128"
# add the appropriate genome annotation
genome(coad[[rag]]) <- "NCBI36"
# change the style to UCSC
seqlevelsStyle(rowRanges(coad[[rag]])) <- "UCSC"

# inspect changes
seqlevels(rowRanges(coad[[rag]]))
genome(coad[[rag]])

## -----------------------------------------------------------------------------
lifturl <-
"http://hgdownload.cse.ucsc.edu/goldenpath/hg18/liftOver/hg18ToHg19.over.chain.gz"
bfc <- BiocFileCache()
qfile <- bfcquery(bfc, "18to19chain", exact = TRUE)[["rpath"]]
cfile <-
if (length(qfile) && file.exists(qfile)) {
    bfcquery(bfc, "18to19chain", exact = TRUE)[["rpath"]]
} else {
    bfcadd(bfc, "18to19chain", lifturl)
}

chainfile <- file.path(tempdir(), gsub("\\.gz", "", basename(cfile)))
R.utils::gunzip(cfile, destname = chainfile, remove = FALSE)

chain <- suppressMessages(
    rtracklayer::import.chain(chainfile)
)

ranges19 <- rtracklayer::liftOver(rowRanges(coad[[rag]]), chain)

## -----------------------------------------------------------------------------
liftchain <-
"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz"
bfc <- BiocFileCache()
q38file <- bfcquery(bfc, "19to38chain", exact = TRUE)[["rpath"]]
c38file <-
if (length(q38file) && file.exists(q38file)) {
    bfcquery(bfc, "19to38chain", exact = TRUE)[["rpath"]]
} else {
    bfcadd(bfc, "19to38chain", liftchain)
}

cloc38 <- file.path(tempdir(), gsub("\\.gz", "", basename(c38file)))
R.utils::gunzip(c38file, destname = cloc38, remove = FALSE)

chain38 <- suppressMessages(
    rtracklayer::import.chain(cloc38)
)

## then use the liftOver function using the 'chain38' object
## as above

ranges38 <- rtracklayer::liftOver(unlist(ranges19), chain38)

## -----------------------------------------------------------------------------
re19 <- coad[[rag]][as.logical(lengths(ranges19))]
ranges19 <- unlist(ranges19)
genome(ranges19) <- "hg19"
rowRanges(re19) <- ranges19
# replacement
coad[["COAD_Mutation-20160128"]] <- re19
rowRanges(re19)

## -----------------------------------------------------------------------------
coad <- qreduceTCGA(coad, keep.assay = TRUE)

## -----------------------------------------------------------------------------
symbolsToRanges(coad)

## -----------------------------------------------------------------------------
library(GenomicDataCommons)

queso <- files(legacy = TRUE) %>%
    filter( ~ cases.project.project_id == "TCGA-COAD" &
        data_category == "Gene expression" &
        data_type == "Exon quantification")

gdc_set_cache(directory = tempdir())

## ----eval=FALSE---------------------------------------------------------------
#  ## FALSE until gdcdata works
#  qu <- manifest(queso)
#  qq <- gdcdata(qu$id[1:4])
#  
#  makeGRangesListFromExonFiles(qq, nrows = 4)

## -----------------------------------------------------------------------------
## Load example file found in package
pkgDir <- system.file("extdata", package = "TCGAutils", mustWork = TRUE)
exonFile <- list.files(pkgDir, pattern = "cation\\.txt$", full.names = TRUE)
exonFile

## We add the original file prefix to query for the UUID and get the
## TCGAbarcode
filePrefix <- "unc.edu.32741f9a-9fec-441f-96b4-e504e62c5362.1755371."

## Add actual file name manually
makeGRangesListFromExonFiles(exonFile,
    fileNames = paste0(filePrefix, basename(exonFile)))

## -----------------------------------------------------------------------------
grlFile <- system.file("extdata", "blca_cnaseq.txt", package = "TCGAutils")
grl <- read.table(grlFile)
head(grl)

makeGRangesListFromCopyNumber(grl, split.field = "Sample")

makeGRangesListFromCopyNumber(grl, split.field = "Sample",
    keep.extra.columns = TRUE)

## -----------------------------------------------------------------------------
tempDIR <- tempdir()
co <- getFirehoseData("COAD", clinical = FALSE, GISTIC = TRUE,
    destdir = tempDIR)

selectType(co, "GISTIC")
class(selectType(co, "GISTIC"))

makeSummarizedExperimentFromGISTIC(co, "Peaks")

## -----------------------------------------------------------------------------
race_df <- DataFrame(race_f = factor(colData(coad)[["race"]]),
    row.names = rownames(colData(coad)))
mergeColData(coad, race_df)

## -----------------------------------------------------------------------------
(xbarcode <- head(colnames(coad)[["COAD_CNASeq-20160128_simplified"]], 4L))
barcodeToUUID(xbarcode)

## -----------------------------------------------------------------------------
UUIDtoBarcode("ae55b2d3-62a1-419e-9f9a-5ddfac356db4", from_type = "case_id")

## -----------------------------------------------------------------------------
UUIDtoBarcode("0001801b-54b0-4551-8d7a-d66fb59429bf", from_type = "file_id")

## -----------------------------------------------------------------------------
UUIDtoBarcode("d85d8a17-8aea-49d3-8a03-8f13141c163b", from_type = "aliquot_ids")

## -----------------------------------------------------------------------------
head(UUIDtoUUID("ae55b2d3-62a1-419e-9f9a-5ddfac356db4", to_type = "file_id"))

## -----------------------------------------------------------------------------
## Return participant barcodes
TCGAbarcode(xbarcode, participant = TRUE)

## Just return samples
TCGAbarcode(xbarcode, participant = FALSE, sample = TRUE)

## Include sample data as well
TCGAbarcode(xbarcode, participant = TRUE, sample = TRUE)

## Include portion and analyte data
TCGAbarcode(xbarcode, participant = TRUE, sample = TRUE, portion = TRUE)

## -----------------------------------------------------------------------------
## Select primary solid tumors
TCGAsampleSelect(xbarcode, "01")

## Select blood derived normals
TCGAsampleSelect(xbarcode, "10")

## -----------------------------------------------------------------------------
TCGAbiospec(xbarcode)

## -----------------------------------------------------------------------------
oncoPrintTCGA(coad, matchassay = rag)

## -----------------------------------------------------------------------------
## Obtained previously
sampleCodes <- TCGAbarcode(xbarcode, participant = FALSE, sample = TRUE)

## Lookup table
head(sampleTypes)

## Match codes found in the barcode to the lookup table
sampleTypes[match(unique(substr(sampleCodes, 1L, 2L)), sampleTypes[["Code"]]), ]

## -----------------------------------------------------------------------------
data("clinicalNames")

clinicalNames

lengths(clinicalNames)

## -----------------------------------------------------------------------------
sessionInfo()

Try the TCGAutils package in your browser

Any scripts or data that you put into this service are public.

TCGAutils documentation built on April 17, 2021, 6:04 p.m.