knitr::opts_chunk$set(echo = TRUE)
suppressPackageStartupMessages({
    library(MultiAssayExperiment)
    library(curatedTCGAData)
    library(TCGAutils)
    library(CNVRanger)
    library(BiocParallel)
})

Load packages:

library(MultiAssayExperiment)
library(curatedTCGAData)
library(TCGAutils)
library(CNVRanger)
library(BiocParallel)

Obtain ACC data from curatedTCGAData:

data("diseaseCodes")
head(avail <- diseaseCodes[
    diseaseCodes[["Available"]] == "Yes", "Study.Abbreviation"])
## Peaks data not available in LAML or SKCM
avail <- avail[!avail %in% c("LAML", "SKCM")]
# previous run with first 3
avail <- avail[-(1:3)]
.addCallStates <-
    function(mae, calls_assay, sample = "01", col = "Segment_Mean") {
    calls_assay <- paste0(sample, "_", calls_assay)
    state <- round(mcols(mae[[calls_assay]])[[col]])
    state[state > 2] <- 2
    state[state < -2] <- -2
    mcols(mae[[calls_assay]])$State <- state + 2
    mcols(mae[[calls_assay]]) <-
        mcols(mae[[calls_assay]])[
            c("State", "Segment_Mean", "Num_Probes")]
    mae
}

## add helper for datasets with duplicate rownames (weren't added via pipeline)
.fixRowRanges <- function(mae, cnvrs) {
    peakObj <- mae[[cnvrs]]
    if (is.null(rowRanges(peakObj))) {
        peakrow <- rowData(peakObj)
        rows <- peakrow[,
            grepl("gene|ranges", names(peakrow), ignore.case = TRUE)]
        if (length(rows)) {
            rowRanges(peakObj) <- as(rows, "GRanges")
            rowData(peakObj) <- peakrow
        }
    }
    mae[[cnvrs]] <- peakObj
    mae
}

MultiOmic Quality Control

# BiocManager::install("Bioconductor/MultiOmicQC")
library(MultiOmicQC)

eQTL Analysis

eQTL_pan <- function(ccode, sample = "01") {
    mae <- curatedTCGAData::curatedTCGAData(ccode,
        assays = c("GISTIC_Peaks", "CNVSNP", "RNASeq2GeneNorm"),
        dry.run = FALSE)
    scalls <- paste0(sample, "_", ccode, "_CNVSNP-20160128")
    rrnas <- paste0(sample, "_", ccode, "_RNASeq2GeneNorm-20160128",
        "_ranged")
    scnvrs <- paste0(sample, "_", ccode, "_GISTIC_Peaks-20160128")

    mae <- symbolsToRanges(mae, unmapped = FALSE)
    genome(mae) <- "hg19"
    seqlevelsStyle(mae) <- "UCSC"

    mae <- intersectColumns(mae)
    mae <- splitAssays(mae, sampleCodes=sample)
    mae <- .addCallStates(mae, paste0(ccode, "_CNVSNP-20160128"))
    mae <- .fixRowRanges(mae, scnvrs)
    CNVRanger::cnvExprAssoc(
        cnvrs = scnvrs,
        calls = scalls,
        rcounts = rrnas,
        data = mae
    )
}
param <- MulticoreParam(workers = 14, stop.on.error = FALSE)
avail <- setNames(avail, avail)
pancan <- bptry(
    bplapply(avail, eQTL_pan, BPPARAM = param)
)

save(pancan, file = "pancanres.rda")
pancan.redo <- bptry(
    bplapply(avail, eQTL_pan, BPREDO = pancan, BPPARAM = param)
)
save(pancan.redo, file = "pancan_second_try.rda")
## rinse and repeat
pancan.try2 <- bptry(
    bplapply(avail, eQTL_pan, BPREDO = pancan.redo, BPPARAM = param)
)

pancan.try2 <- setNames(pancan.try2, avail)

names(which(!bpok(pancan.try2)))
# [1] "DLBC" "KICH" "THYM"

save(pancan.try2, file = "pancan_third_try.rda")

about n = 100

permute sample labels



LiNk-NY/curatedTCGAManu documentation built on July 22, 2020, 4:06 p.m.