Installation

devtools::install_github("keyuan/ccube")
# wch's setting 
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
library(ccube)

library(dplyr)
#library(doParallel)
options(stringsAsFactors = F)

Generate a synthetic data set

First, we generate a toy dataset with 500 mutations

set.seed(1234)
numSnv <- 500
ccfSet <- c(1, 0.7, 0.3) # true ccf pool
ccfTrue <- sample(ccfSet, numSnv, c(0.5,0.2,0.3), replace = T) # simulate true clusters
purity <- 0.8
cnPoolMaj <- c(1,2,3,4) # a pool of possible major copy numbers
cnPoolMin <- c(0,1,2) # a pool of possible minor copy numbers
cnPoolMajFractions <- c(1/4, 1/4, 1/4, 1/4) # prevalence of possible major copy numbers
cnPoolMinFractions <- c(1/3, 1/3, 1/3) # prevalence of possible minor copy numbers

cnProfile = GenerateCopyNumberProfile(cnPoolMaj, cnPoolMin, 
                                      cnPoolMajFractions, cnPoolMinFractions, numSnv)

head(cnProfile) # column 1: minor copy number, column 2: major copy number, column 3: total copy number 

Simulate cancer cell fractions, multiplicity, and reads counts

baseDepth = 40
mydata <- data.frame(mutation_id = paste0("ss","_", seq_len(numSnv)) ,
                     ccf_true = ccfTrue,
                     minor_cn = cnProfile[,1],
                     major_cn = cnProfile[,2],
                     total_cn = cnProfile[,3], 
                     purity = purity,
                     normal_cn = 2)

mydata <- dplyr::mutate(rowwise(mydata),
                        mult_true = sample(seq(1,if (major_cn ==1) { 1 } else {major_cn}), 1), # simulate multiplicity
                        vaf_true = cp2ap(ccf_true, purity, normal_cn, total_cn, total_cn, mult_true), # simulate vaf
                        total_counts = rpois(1, total_cn/2 * baseDepth), # simulate total read counts
                        var_counts = rbinom(1, total_counts, vaf_true),  # simulate variant read counts
                        ref_counts = total_counts - var_counts)

head(mydata)

Run Ccube pipeline

numOfClusterPool = 1:6
numOfRepeat = 1
results <- RunCcubePipeline(ssm = mydata, 
                            numOfClusterPool = numOfClusterPool, 
                            numOfRepeat = numOfRepeat,
                            runAnalysis = T, 
                            runQC = T)

The results list contains four variables:

Finally, we make a default plot of Ccube results

MakeCcubeStdPlot(ssm = results$ssm, res = results$res, printPlot = F)

Estimate purity

Ccube can make its own purity estimation from mutations in normal copy number regions.

est_purity = GetPurity(mydata = mydata)
est_purity


keyuan/ccube documentation built on Jan. 11, 2023, 12:01 a.m.