inst/doc/ACE_vignette.R

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ---- echo=FALSE, message=FALSE, warning=FALSE--------------------------------
library(ACE)
library(Biobase)
library(QDNAseq)
library(ggplot2)

## ---- eval = FALSE------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  # BiocManager::install()
#  BiocManager::install("ACE")

## ---- eval = FALSE------------------------------------------------------------
#  # install.packages("devtools")
#  devtools::install_github("tgac-vumc/ACE")

## ---- eval = FALSE------------------------------------------------------------
#  # specify the directory containing your bam-files
#  userpath <- tempdir()
#  # if you do not want the output in the same directory, use argument outputdir
#  runACE(userpath, filetype='bam', binsizes = c(100, 1000),
#         ploidies = c(2,4), imagetype='png')

## ---- eval = FALSE------------------------------------------------------------
#  data("copyNumbersSegmented")
#  # specify the directory in which to create the output
#  userpath <- tempdir()
#  saveRDS(file.path(userpath,"copyNumbersSegmented.rds"))
#  runACE(userpath, filetype='rds', ploidies = c(2,4), imagetype='png')

## -----------------------------------------------------------------------------
data("copyNumbersSegmented")
object <- copyNumbersSegmented
model1 <- singlemodel(object, QDNAseqobjectsample = 1)
bestfit1 <- model1$minima[tail(which(model1$rerror==min(model1$rerror)), 1)]
besterror1 <- min(model1$rerror)
lastfit1 <- tail(model1$minima, 1)
lasterror1 <- tail(model1$rerror, 1)
singleplot(object, QDNAseqobjectsample = 1, cellularity = bestfit1, 
           error = besterror1, standard = model1$standard, 
           title = "sample1 - binsize 1000 kbp - 379776 reads - 2N fit 1")
singleplot(object, QDNAseqobjectsample = 1, cellularity = lastfit1, 
           error = lasterror1, standard = model1$standard, 
           title = "sample1 - binsize 1000 kbp - 379776 reads - 2N fit 7")
model1$errorplot + ggtitle("sample1 - errorlist") + 
  theme(plot.title = element_text(hjust = 0.5))
model2 <- singlemodel(object, QDNAseqobjectsample = 2)
singleplot(object, QDNAseqobjectsample = 2, 0.38, 0.183, 
           title = "sample2 - binsize 1000 kbp - 369331 reads - 2N fit 3")
singleplot(object, QDNAseqobjectsample = 2, 0.94, 0.998, 
           title = "sample2 - binsize 1000 kbp - 369331 reads - 2N fit 6")
model2$errorplot +	ggtitle("sample2 - errorlist") + 
  theme(plot.title = element_text(hjust = 0.5))

## -----------------------------------------------------------------------------
data("copyNumbersSegmented")
object <- copyNumbersSegmented
# Since you're convinced the mode is 3N, you can run the singlemodel function to
# fit at ploidy = 3
model <- singlemodel(object, QDNAseqobjectsample = 2, ploidy = 3)
ls(model)
model$minima
model$rerror
model$errorplot
# Examining the errors can give you a feel for the fits. Experience tells
# us the last fit is probably the right one, so let's check out the copy number 
# plot. Specify the same parameters, now including the cellularity derived from 
# the model.
singleplot(object, QDNAseqobjectsample = 2, cellularity = 0.46, ploidy = 3)
# That is actually a very nice fit, let's run with it!
# You can now save the plot however you like. 

## -----------------------------------------------------------------------------
# To use data from QDNAseq-objects, ACE parses it into data frames referred to
# as "templates". Because we will look at sample2 several times, we can just
# create a variable with this data frame.
template <- objectsampletotemplate(object, index = 2)
head(template)
head(na.exclude(template))
# The template has the raw data from QDNAseq
median(na.exclude(template$segments))
# That number looks familiar ... but suppose I am not happy with it?
# You could find the values of all segments by doing
unique(na.exclude(template$segments))
# Personally I like the rle function, because it also shows you the "length" of
# a segment
segmentdata <- rle(as.vector(na.exclude(template$segments)))
plot(rep(segmentdata$values, segmentdata$lengths))
# Yes, it can be that easy to make something resembling a copy number plot :-) 
# Let's say we are sure that the segment with value 0.8105095 is 2N, we can use
# that number as new standard. First we need to find a good model that fits with
# this hypothesis
model <- singlemodel(template, ploidy = 2, standard = 0.8105095)
data.frame(model$minima, model$rerror)
singleplot(template, cellularity = 0.46, ploidy = 2, standard = 0.8105095)
# Choosing a lower standard shifts the absolute copy numbers up
# As a result we ended up with the exact same model as the 3N fit
# Note that I can directly use the template for the singlemodel and singleplot functions

## -----------------------------------------------------------------------------
# Let's continue with the template we made for sample 2, and just see what happens...
# Since the output of this one is pretty big, I'm saving it to a variable
sqmodel <- squaremodel(template)
ls(sqmodel)
# Yes, you get a lot of bang for your buck. You get back the parameters it used,
# but also the errors of all combinations tested in both a matrix and a
# dataframe, and where minima are found. But the fun comes in form of the
# matrixplot.
sqmodel$matrixplot + ggtitle("Sky on fire")
# You can find your minima of interest in the minimadf
# Note that the minima are sorted by their relative error
head(sqmodel$minimadf, 10)
# Guess I was warned about this ... squaremodel is a bit fithappy; time to put
# the thumbscrews on the fit
squaremodel(template, penalty = 0.5, penploidy = 0.5)$matrixplot
# Much better. Additionally you can play around with the range and resolution
sqmodel <- squaremodel(template, prows = 150, ptop = 3.3, pbottom = 1.8, 
                       penalty = 0.5, penploidy = 0.5)
sqmodel$matrixplot
# Going for higher ploidy resolution is not recommended! Not only will it take
# much longer to run, it will start distinguishing minima that correspond with
# basically the same model.
head(sqmodel$minimadf, 10)
singleplot(template, cellularity = 0.41, ploidy = 2.08)
# Perhaps this fit is a little bit better than our original 2N fit, 
# but the difference is negligible.

## -----------------------------------------------------------------------------
# Let's use the template dataframe we already created in the previous section
# for sample2. For cellularity and ploidy I'll use the original 2N fit
segmentdf <- getadjustedsegments(template, cellularity = 0.38)
head(segmentdf)

## -----------------------------------------------------------------------------
# Luck has it, we actually have mutation data for these samples. Bad luck has
# it, quality was very low, so calculated mutant copies are not very precise. 
# We use the segment data frame created in the previous section. 
# Mutation data can be provided as a file or as a data frame. The result will be 
# printed to file or returned as a supplemented data frame respectively.
# I will create the mutation data frame manually here.
Gene <- c("CASP8", "CDKN2A", "TP53")
Chromosome <- c(2, 9, 17)
Position <- c(202149589, 21971186, 7574003)
Frequency <- c(47.46, 36.28, 43.48)
variantdf <- data.frame(Gene, Chromosome, Position, Frequency)
linkvariants(variantdf, segmentdf, cellularity = 0.38, 
                 chrindex = 2, posindex = 3, freqindex = 4)

## -----------------------------------------------------------------------------
analyzegenomiclocations(segmentdf, Chromosome = 1, Position = 26365569)

## -----------------------------------------------------------------------------
chr <- c(1,2,3)
pos <- c(2000000,4000000,6000000)
analyzegenomiclocations(segmentdf, Chromosome = chr, Position = pos)

## -----------------------------------------------------------------------------
freq <- c(38,19,0)
analyzegenomiclocations(segmentdf=segmentdf, cellularity = 0.38, 
                        Chromosome = chr, Position = pos, Frequency = freq)

## ---- eval = FALSE------------------------------------------------------------
#  # Set the correct path
#  userpath <- tempdir()
#  # This function needs a models-file, which should like like this
#  sample <- c("sample1", "sample2")
#  cellularity <- c(0.79, 0.38)
#  ploidy <- c(2, 2)
#  standard <- c(1, 1)
#  models <- data.frame(sample, cellularity, ploidy, standard)
#  write.table(models, file.path(userpath, "models.tsv"), quote = FALSE,
#              sep = "\t", na = "", row.names = FALSE)
#  # Let's make sure we have some mutation data to analyze
#  # For simplicity, I will just use the same mutation data for both samples
#  write.table(mutationdf, file.path(userpath, "sample1_mutations.tsv"),
#              quote = FALSE, sep = "\t", na = "", row.names = FALSE)
#  write.table(mutationdf, file.path(userpath, "sample2_mutations.tsv"),
#              quote = FALSE, sep = "\t", na = "", row.names = FALSE)
#  # Let's go!
#  postanalysisloop(object, inputdir = userpath, postfix = "_mutations",
#                   chrindex = 2, posindex = 3, freqindex = 4,
#                   outputdir = file.path(userpath,"output_loop"), imagetype = 'png')
#  # note that mutation data is optional!

## -----------------------------------------------------------------------------
ACEcall(object, 2, cellularity = 0.39)$calledplot

## -----------------------------------------------------------------------------
# I don't think these two samples are very much related
tsc <- twosamplecompare(object, index1 = 1, index2 = 2, 
                        cellularity1 = 0.79, cellularity2 = 0.39)
tsc$compareplot

## -----------------------------------------------------------------------------
# Let's see some more fits for sample1!
sms <- squaremodelsummary(object, QDNAseqobjectsample = 1, 
                          squaremodel(object, 1, penalty = 0.5, penploidy = 0.5),
                          printplots = FALSE)
sms[[3]]

## -----------------------------------------------------------------------------
# I like squaremodels
lsm <- loopsquaremodel(object, printplots = FALSE, printobjectsummary = FALSE, 
                       penalty = 0.5, penploidy = 0.5, returnmodels = TRUE)
class(lsm)
length(lsm)
class(lsm[[1]])
ls(lsm[[1]])
lsm[[1]]$samplename
lsm[[1]]$matrixplot

## -----------------------------------------------------------------------------
sessionInfo()

Try the ACE package in your browser

Any scripts or data that you put into this service are public.

ACE documentation built on Nov. 8, 2020, 5:30 p.m.