devtools::install_github("keyuan/ccube")
# wch's setting knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
library(ccube) library(dplyr) #library(doParallel) options(stringsAsFactors = F)
First, we generate a toy dataset with 500 mutations
set.seed(1234) numSnv <- 500 ccfSet <- c(1, 0.7, 0.3) # true ccf pool ccfTrue <- sample(ccfSet, numSnv, c(0.5,0.2,0.3), replace = T) # simulate true clusters purity <- 0.8 cnPoolMaj <- c(1,2,3,4) # a pool of possible major copy numbers cnPoolMin <- c(0,1,2) # a pool of possible minor copy numbers cnPoolMajFractions <- c(1/4, 1/4, 1/4, 1/4) # prevalence of possible major copy numbers cnPoolMinFractions <- c(1/3, 1/3, 1/3) # prevalence of possible minor copy numbers cnProfile = GenerateCopyNumberProfile(cnPoolMaj, cnPoolMin, cnPoolMajFractions, cnPoolMinFractions, numSnv) head(cnProfile) # column 1: minor copy number, column 2: major copy number, column 3: total copy number
Simulate cancer cell fractions, multiplicity, and reads counts
baseDepth = 40 mydata <- data.frame(mutation_id = paste0("ss","_", seq_len(numSnv)) , ccf_true = ccfTrue, minor_cn = cnProfile[,1], major_cn = cnProfile[,2], total_cn = cnProfile[,3], purity = purity, normal_cn = 2) mydata <- dplyr::mutate(rowwise(mydata), mult_true = sample(seq(1,if (major_cn ==1) { 1 } else {major_cn}), 1), # simulate multiplicity vaf_true = cp2ap(ccf_true, purity, normal_cn, total_cn, total_cn, mult_true), # simulate vaf total_counts = rpois(1, total_cn/2 * baseDepth), # simulate total read counts var_counts = rbinom(1, total_counts, vaf_true), # simulate variant read counts ref_counts = total_counts - var_counts) head(mydata)
numOfClusterPool = 1:6 numOfRepeat = 1 results <- RunCcubePipeline(ssm = mydata, numOfClusterPool = numOfClusterPool, numOfRepeat = numOfRepeat, runAnalysis = T, runQC = T)
The results
list contains four variables:
res
: Optimal model solutionlabel
: Most likely cluster assignmentsfull.model
: All Ccube model parametersresponsibility
: Cluster assigment probabilitiesccfMean
: Cluster means, $\mu_k$ccfCov
: Cluster variance $\sigma^2_k$bv
: Multiplicity estimatesEpi
: Mean of dirchlet posteriordirichletConcentration
: Concentration parameter of dirchlet posteriordirichletConcentration0
: Concentration parameter of dirchlet priornormalMean
: Mean of normal priorinvWhishartScale
: Variance of normal priorL
: ELBO trace ssm
: A data frame of processed mutations with additional annotations ccube_ccf_mean
: CCF cluster meanccube_mult
: Ccube multiplicity estimatesccube_ccf
: Event CCF, i.e. CCF estimates for individual SNV results
: A list all fitted models. Each element is structured the same as res
lb
: Best ELBO across fitted modelsdroppedSsm
: Dropped variants (i.e. variants in regions where major copy number is zero) Finally, we make a default plot of Ccube results
MakeCcubeStdPlot(ssm = results$ssm, res = results$res, printPlot = F)
Ccube can make its own purity estimation from mutations in normal copy number regions.
est_purity = GetPurity(mydata = mydata) est_purity
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.