inst/doc/MultiAssayExperiment.R

## ---- eval = FALSE------------------------------------------------------------
#  if (!require("BiocManager"))
#      install.packages("BiocManager")
#  BiocManager::install("MultiAssayExperiment")

## ----include=TRUE,results="hide",message=FALSE,warning=FALSE------------------
library(MultiAssayExperiment)
library(GenomicRanges)
library(SummarizedExperiment)
library(RaggedExperiment)

## -----------------------------------------------------------------------------
empty <- MultiAssayExperiment()
empty
slotNames(empty)

## ---- echo = FALSE, fig.cap = "MultiAssayExperiment object schematic shows the design of the infrastructure class. The colData provides data about the patients, cell lines, or other biological units, with one row per unit and one column per variable. The experiments are a list of assay datasets of arbitrary class, with one column per observation. The sampleMap links a single table of patient data (colData) to a list of experiments via a simple but powerful table of experiment:patient edges (relationships), that can be created automatically in simple cases or in a spreadsheet if assay-specific sample identifiers are used. sampleMap relates each column (observation) in the assays (experiments) to exactly one row (biological unit) in colData; however, one row of colData may map to zero, one, or more columns per assay, allowing for missing and replicate assays. Green stripes indicate a mapping of one subject to multiple observations across experiments.", out.width = "\\maxwidth"----
knitr::include_graphics("MultiAssayExperiment.png")

## -----------------------------------------------------------------------------
class(experiments(empty)) # ExperimentList

## -----------------------------------------------------------------------------
patient.data <- data.frame(sex=c("M", "F", "M", "F"),
    age=38:41,
    row.names=c("Jack", "Jill", "Bob", "Barbara"))
patient.data

## -----------------------------------------------------------------------------
is(sampleMap(empty), "DataFrame") # TRUE

## -----------------------------------------------------------------------------
exprss1 <- matrix(rnorm(16), ncol = 4,
        dimnames = list(sprintf("ENST00000%i", sample(288754:290000, 4)),
                c("Jack", "Jill", "Bob", "Bobby")))
exprss2 <- matrix(rnorm(12), ncol = 3,
        dimnames = list(sprintf("ENST00000%i", sample(288754:290000, 4)),
                c("Jack", "Jane", "Bob")))
doubleExp <- list("methyl 2k"  = exprss1, "methyl 3k" = exprss2)
simpleMultiAssay <- MultiAssayExperiment(experiments=doubleExp)
simpleMultiAssay

## -----------------------------------------------------------------------------
colData(simpleMultiAssay)

## -----------------------------------------------------------------------------
simpleMultiAssay2 <- MultiAssayExperiment(experiments=doubleExp,
                                          colData=patient.data)
simpleMultiAssay2
colData(simpleMultiAssay2)

## -----------------------------------------------------------------------------
class(metadata(empty)) # NULL (class "ANY")

## -----------------------------------------------------------------------------
metadata(experiments(empty))

## ---- message=FALSE-----------------------------------------------------------
(arraydat <- matrix(seq(101, 108), ncol=4,
    dimnames=list(c("ENST00000294241", "ENST00000355076"),
    c("array1", "array2", "array3", "array4"))))

coldat <- data.frame(slope53=rnorm(4),
    row.names=c("array1", "array2", "array3", "array4"))

exprdat <- SummarizedExperiment(arraydat, colData=coldat)
exprdat

## -----------------------------------------------------------------------------
(exprmap <- data.frame(primary=rownames(patient.data)[c(1, 2, 4, 3)],
                       colname=c("array1", "array2", "array3", "array4"),
                       stringsAsFactors = FALSE))

## -----------------------------------------------------------------------------
(methyldat <-
   matrix(1:10, ncol=5,
          dimnames=list(c("ENST00000355076", "ENST00000383706"),
                        c("methyl1", "methyl2", "methyl3",
                          "methyl4", "methyl5"))))

## -----------------------------------------------------------------------------
(methylmap <- data.frame(primary = c("Jack", "Jack", "Jill", "Barbara", "Bob"),
    colname = c("methyl1", "methyl2", "methyl3", "methyl4", "methyl5"),
    stringsAsFactors = FALSE))

## -----------------------------------------------------------------------------
(microdat <- matrix(201:212, ncol=3,
                    dimnames=list(c("hsa-miR-21", "hsa-miR-191",
                                    "hsa-miR-148a", "hsa-miR148b"),
                                  c("micro1", "micro2", "micro3"))))

## -----------------------------------------------------------------------------
(micromap <- data.frame(primary = c("Jack", "Barbara", "Bob"),
    colname = c("micro1", "micro2", "micro3"), stringsAsFactors = FALSE))

## -----------------------------------------------------------------------------
nrows <- 5; ncols <- 4
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
rowRanges <- GRanges(rep(c("chr1", "chr2"), c(2, nrows - 2)),
    IRanges(floor(runif(nrows, 1e5, 1e6)), width=100),
    strand=sample(c("+", "-"), nrows, TRUE),
    feature_id=sprintf("ID\\%03d", 1:nrows))
names(rowRanges) <- letters[1:5]
colData <- DataFrame(Treatment=rep(c("ChIP", "Input"), 2),
    row.names= c("mysnparray1", "mysnparray2", "mysnparray3", "mysnparray4"))
rse <- SummarizedExperiment(assays=SimpleList(counts=counts),
    rowRanges=rowRanges, colData=colData)

## -----------------------------------------------------------------------------
(rangemap <-
    data.frame(primary = c("Jack", "Jill", "Bob", "Barbara"),
    colname = c("mysnparray1", "mysnparray2", "mysnparray3", "mysnparray4"),
        stringsAsFactors = FALSE))

## -----------------------------------------------------------------------------
listmap <- list(exprmap, methylmap, micromap, rangemap)
names(listmap) <- c("Affy", "Methyl 450k", "Mirna", "CNV gistic")
listmap

## -----------------------------------------------------------------------------
dfmap <- listToMap(listmap)
dfmap

## ---- eval=FALSE--------------------------------------------------------------
#  mapToList(dfmap, "assay")

## -----------------------------------------------------------------------------
objlist <- list("Affy" = exprdat, "Methyl 450k" = methyldat,
    "Mirna" = microdat, "CNV gistic" = rse)

## -----------------------------------------------------------------------------
myMultiAssay <- MultiAssayExperiment(objlist, patient.data, dfmap)
myMultiAssay

## -----------------------------------------------------------------------------
experiments(myMultiAssay)
colData(myMultiAssay)
sampleMap(myMultiAssay)
metadata(myMultiAssay)

## -----------------------------------------------------------------------------
objlist3 <- objlist
(names(objlist3) <- NULL)

try(prepMultiAssay(objlist3, patient.data, dfmap)$experiments,
    outFile = stdout())

## -----------------------------------------------------------------------------
names(objlist3) <- toupper(names(objlist))
names(objlist3)
unique(dfmap[, "assay"])
prepMultiAssay(objlist3, patient.data, dfmap)$experiments

## -----------------------------------------------------------------------------
exampleMap <- sampleMap(simpleMultiAssay2)
sapply(doubleExp, colnames)
exampleMap
prepMultiAssay(doubleExp, patient.data, exampleMap)$metadata$drops

## -----------------------------------------------------------------------------
exMap <- rbind(dfmap,
    DataFrame(assay = "New methyl", primary = "Joe",
        colname = "Joe"))
invisible(prepMultiAssay(objlist, patient.data, exMap))

## -----------------------------------------------------------------------------
prepped <- prepMultiAssay(objlist, patient.data, exMap)
preppedMulti <- MultiAssayExperiment(prepped$experiments, prepped$colData,
    prepped$sampleMap, prepped$metadata)
preppedMulti

## -----------------------------------------------------------------------------
do.call(MultiAssayExperiment, prepped)

## -----------------------------------------------------------------------------
grlls <- list(chr = rep("chr1", nrows), start = seq(11, 15),
    end = seq(12, 16), strand = c("+", "-", "+", "*", "*"),
    score = seq(1, 5), specimen = c("a", "a", "b", "b", "c"),
    gene_symbols = paste0("GENE", letters[seq_len(nrows)]))

grldf <- as.data.frame(grlls, stringsAsFactors = FALSE)

GRL <- makeGRangesListFromDataFrame(grldf, split.field = "specimen",
    names.field = "gene_symbols")

## -----------------------------------------------------------------------------
RaggedExperiment(GRL)

## -----------------------------------------------------------------------------
sels <- list(chr = rep("chr2", nrows), start = seq(11, 15),
    end = seq(12, 16), strand = c("+", "-", "+", "*", "*"),
    expr0 = seq(3, 7), expr1 = seq(8, 12), expr2 = seq(12, 16))
sedf <- as.data.frame(sels,
    row.names = paste0("GENE", letters[rev(seq_len(nrows))]),
    stringsAsFactors = FALSE)
sedf
makeSummarizedExperimentFromDataFrame(sedf)

## ---- eval=FALSE--------------------------------------------------------------
#  myMultiAssay[rows, columns, assays]

## -----------------------------------------------------------------------------
myMultiAssay["ENST00000355076", , ]

## -----------------------------------------------------------------------------
myMultiAssay["ENST00000355076", 1:2, c("Affy", "Methyl 450k")]

## -----------------------------------------------------------------------------
myMultiAssay[, "Jack", ]
myMultiAssay[, 1, ]
myMultiAssay[, c(TRUE, FALSE, FALSE, FALSE), ]

## -----------------------------------------------------------------------------
myMultiAssay[, , "Mirna"]
myMultiAssay[, , 3]
myMultiAssay[, , c(FALSE, FALSE, TRUE, FALSE, FALSE)]

## -----------------------------------------------------------------------------
myMultiAssay["ENST00000355076", , , drop=FALSE]

## -----------------------------------------------------------------------------
myMultiAssay["ENST00000355076", , , drop=TRUE]

## -----------------------------------------------------------------------------
colData(myMultiAssay)

## -----------------------------------------------------------------------------
myMultiAssay[, 1:2]

## -----------------------------------------------------------------------------
malesMultiAssay <- myMultiAssay[, myMultiAssay$sex == "M"]
colData(malesMultiAssay)

## -----------------------------------------------------------------------------
allsamples <- colnames(myMultiAssay)
allsamples

## -----------------------------------------------------------------------------
allsamples[["Methyl 450k"]] <- allsamples[["Methyl 450k"]][-3:-5]
myMultiAssay[, as.list(allsamples), ]
subsetByColumn(myMultiAssay,  as.list(allsamples))  #equivalent

## -----------------------------------------------------------------------------
myMultiAssay[, , c("Affy", "CNV gistic")]

## -----------------------------------------------------------------------------
is.cnv <- grepl("CNV", names(experiments(myMultiAssay)))
is.cnv
myMultiAssay[, , is.cnv]  #logical subsetting
myMultiAssay[, , which(is.cnv)] #integer subsetting

## -----------------------------------------------------------------------------
myMultiAssay[list(Mirna = 1:2), , ]
## equivalently
subsetByRow(myMultiAssay, list(Mirna = 1:2))

## -----------------------------------------------------------------------------
featSub0 <- subsetByRow(myMultiAssay, "ENST00000355076")
featSub1 <- myMultiAssay["ENST00000355076", , drop = FALSE] #equivalent
all.equal(featSub0, featSub1)
class(featSub1)
class(experiments(featSub1))
experiments(featSub1)

## -----------------------------------------------------------------------------
featSubsetted <-
  subsetByRow(myMultiAssay, c("ENST00000355076", "ENST00000294241"))
assay(myMultiAssay, 1L)
assay(featSubsetted, 1L)

## -----------------------------------------------------------------------------
gr <- GRanges(seqnames = c("chr1", "chr1", "chr2"), strand = c("-", "+", "+"),
              ranges = IRanges(start = c(230602, 443625, 934533),
                               end = c(330701, 443724, 934632)))

## -----------------------------------------------------------------------------
subsetted <- subsetByRow(myMultiAssay, gr, maxgap = 2L, type = "within")
experiments(subsetted)
rowRanges(subsetted[[4]])

## -----------------------------------------------------------------------------
names(myMultiAssay)
myMultiAssay[[1]]
myMultiAssay[["Affy"]]

## -----------------------------------------------------------------------------
colData(myMultiAssay)

## -----------------------------------------------------------------------------
complete.cases(myMultiAssay)

## -----------------------------------------------------------------------------
complete.cases(myMultiAssay[, , 1:2])

## -----------------------------------------------------------------------------
myMultiAssay[, complete.cases(myMultiAssay), ]

## -----------------------------------------------------------------------------
replicated(myMultiAssay)

## -----------------------------------------------------------------------------
(ensmblMatches <- intersectRows(myMultiAssay[, , 1:2]))
rownames(ensmblMatches)

## -----------------------------------------------------------------------------
intersectColumns(myMultiAssay)

## -----------------------------------------------------------------------------
mergeReplicates(intersectColumns(myMultiAssay))

## -----------------------------------------------------------------------------
c(myMultiAssay, ExpScores = matrix(1:8, ncol = 4,
dim = list(c("ENSMBL0001", "ENSMBL0002"), paste0("pt", 1:4))),
mapFrom = 1L)

## -----------------------------------------------------------------------------
(affex <- getWithColData(myMultiAssay, 1L))
colData(affex)
class(affex)

## -----------------------------------------------------------------------------
longFormat(myMultiAssay[, , 1:2])

## -----------------------------------------------------------------------------
longFormat(myMultiAssay[, , 1:2], colDataCols="age")

## -----------------------------------------------------------------------------
maemerge <- mergeReplicates(intersectColumns(myMultiAssay))
wideFormat(maemerge, colDataCols="sex")[, 1:5]

## -----------------------------------------------------------------------------
assay(myMultiAssay)

## -----------------------------------------------------------------------------
assays(myMultiAssay)

## ---- eval = FALSE------------------------------------------------------------
#  BiocManager::install("curatedTCGAData")

## -----------------------------------------------------------------------------
rownames(myMultiAssay)
colnames(myMultiAssay)

## -----------------------------------------------------------------------------
objlist2 <- objlist
objlist2[[2]] <- as.vector(objlist2[[2]])

try(MultiAssayExperiment(objlist2, patient.data, dfmap),
    outFile = stdout())

## -----------------------------------------------------------------------------
methods(class="MultiAssayExperiment")

## -----------------------------------------------------------------------------
citation("MultiAssayExperiment")

## -----------------------------------------------------------------------------
sessionInfo()

Try the MultiAssayExperiment package in your browser

Any scripts or data that you put into this service are public.

MultiAssayExperiment documentation built on Nov. 8, 2020, 8:10 p.m.