Nothing
## ----options,include = FALSE--------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,comment = "#>",
echo = TRUE,cache = TRUE,
dev = "png")
# Fix seed
set.seed(12)
## ----setup,warning = FALSE----------------------------------------------------
req_packs = c("devtools","ggplot2","smarter","SMASH")
for(pack in req_packs){
library(package = pack,character.only = TRUE)
}
# List package's exported functions
ls("package:SMASH")
## -----------------------------------------------------------------------------
eS[[3]][[1]]
## -----------------------------------------------------------------------------
eS[[3]][[2]]
## ----echo = FALSE-------------------------------------------------------------
maxLOCI = 50
meanDP = 1e3
nCN = 3
## ----sim----------------------------------------------------------------------
sim = gen_subj_truth(
mat_eS = eS[[3]][[2]],
maxLOCI = maxLOCI,
nCN = nCN)
class(sim)
names(sim)
## -----------------------------------------------------------------------------
# Underlying true allocation, multiplicity, and cellular prevalence per point mutation
dim(sim$subj_truth)
sim$subj_truth[1:5,]
# Combinations of allelic copy number, allocation, and multiplicity
# with resulting mean VAFs (written as MAF) and cellular prevalences
uniq_states = unique(sim$subj_truth[,
c("CN_1","CN_2","true_A","true_M","true_MAF","true_CP")])
rownames(uniq_states) = NULL
round(uniq_states,3)
# Tumor purity
sim$purity
# Tumor subclone proportions
sim$eta
# Cancer subclone proportions
sim$eta / sim$purity
sim$q
## -----------------------------------------------------------------------------
mat_RD = gen_ITH_RD(DATA = sim$subj_truth,RD = meanDP)
dim(mat_RD); mat_RD[1:5,]
smart_hist(rowSums(mat_RD),breaks = 20,
xlab = "Total Depth",cex.lab = 1.5)
# Combine copy number and read count information
input = cbind(sim$subj_truth,mat_RD)
# Calculate observed VAF
input$VAF = input$tAD / rowSums(input[,c("tAD","tRD")])
# Remove rows with no alternate depth
input = input[which(input$tAD > 0),]
dim(input)
input[1:3,]
## -----------------------------------------------------------------------------
# All loci
smart_hist(input$VAF,breaks = 30,main = "All Loci",
xlab = "Observed VAF",cex.lab = 1.5)
abline(v = unique(input$true_MAF),lty = 2,lwd = 2,col = "magenta")
# Loci per copy number state
uCN = unique(input[,c("CN_1","CN_2")])
tmp_range = range(input$VAF)
for(ii in seq(nrow(uCN))){
# ii = 3
idx = which(input$CN_1 == uCN$CN_1[ii] & input$CN_2 == uCN$CN_2[ii])
smart_hist(input$VAF[idx],breaks = 20,xlim = tmp_range,
main = sprintf("Loci with (CN_1,CN_2) = (%s,%s)",uCN$CN_1[ii],uCN$CN_2[ii]),
xlab = "Observed VAF",cex.lab = 1.5)
abline(v = unique(input$true_MAF[idx]),lty = 2,lwd = 2,col = "magenta")
}
## ----know_config,fig.dim = c(5,5)---------------------------------------------
# Calculate true_unc_q, the unconstrained subclone proportions in cancer
true_unc_q = log(sim$q[-length(sim$q)] / sim$q[length(sim$q)])
true_unc_q
# Optimize
ith_out = ITH_optim(my_data = input,
my_purity = sim$purity,
pi_eps0 = 0,
my_unc_q = true_unc_q,
init_eS = eS[[3]][[2]])
# Estimate of unclassified loci
ith_out$pi_eps
# Model fit BIC
ith_out$BIC
# Estimate of subclone proportions in cancer
nSC = length(ith_out$q)
tmp_df = smart_df(SC = as.character(rep(seq(nSC),2)),
CLASS = c(rep("Truth",nSC),rep("Estimate",nSC)),
VALUE = c(sim$q,ith_out$q))
tmp_df
ggplot(data = tmp_df,mapping = aes(x = SC,y = VALUE,fill = CLASS)) +
geom_bar(width = 0.5,stat = "identity",position = position_dodge()) +
xlab("Subclone") + ylab("Proportion in Cancer") +
theme(legend.position = "bottom",
text = element_text(size = 20))
# Compare truth vs estimated/inferred
## Variant Allele Frequency
smoothScatter(input$true_MAF,ith_out$infer$infer_MAF,
main = "VAF",xlab = "Truth",ylab = "Inferred",cex.lab = 1.5)
abline(a = 0,b = 1,lwd = 2,lty = 2,col = "red")
smoothScatter(input$VAF,ith_out$infer$infer_MAF,
main = "VAF",xlab = "Observed",ylab = "Inferred",cex.lab = 1.5)
abline(a = 0,b = 1,lwd = 2,lty = 2,col = "red")
## Cellular prevalence
smoothScatter(input$true_CP,
ith_out$infer$infer_CP,main = "Cellular Prevalence",
xlim = c(0,1),ylim = c(0,1),
xlab = "Truth",ylab = "Estimate",cex.lab = 1.5)
abline(a = 0,b = 1,lwd = 2,lty = 2,col = "red")
## Allocation
smoothScatter(input$true_A,ith_out$infer$infer_A,
main = "Allocation",xlab = "Truth",ylab = "Inferred",
cex.lab = 1.5)
abline(a = 0,b = 1,lwd = 2,lty = 2,col = "red")
if( any(is.na(ith_out$infer$infer_A)) )
cat("Some loci couldn't classify\n")
## Multiplicity
smart_table(Truth = input$true_M,
Inferred = ith_out$infer$infer_M)
## ----unknown_opt,R.options = list(width = 200),fig.dim = c(8,6)---------------------------------------------------------------------------------------------------------------------------------------
grid_ith = grid_ITH_optim(
my_data = input,
my_purity = sim$purity,
list_eS = eS,
trials = 50,
max_iter = 4e3)
names(grid_ith)
# Grid of solutions
grid_ith$GRID
# Order solution(s) based on BIC (larger BIC correspond to better fits)
gg = grid_ith$GRID
gg = gg[order(-gg$BIC),]
head(gg)
# true cancer proportions
sim$q
# true entropy
-sum(sim$q * log(sim$q))
## ----post,fig.dim = c(8,5)----------------------------------------------------
pp = vis_GRID(GRID = grid_ith$GRID)
print(pp)
## -----------------------------------------------------------------------------
gg = grid_ith$GRID
idx = which(gg$BIC == max(gg$BIC))
gg[idx,]
gg$cc[idx][1]
## -----------------------------------------------------------------------------
opt_entropy = gg$entropy[idx]
names(opt_entropy) = sprintf("Solution %s",idx)
opt_entropy
## -----------------------------------------------------------------------------
props = list()
for(jj in seq(length(idx))){
opt_prop = gg$q[idx[jj]]
opt_prop = as.numeric(strsplit(opt_prop,",")[[1]])
names(opt_prop) = sprintf("SC%s",seq(length(opt_prop)))
props[[sprintf("Solution %s",idx[jj])]] = opt_prop
}
props
## -----------------------------------------------------------------------------
for(jj in seq(length(idx))){
cat(sprintf("Solution %s\n",idx[jj]))
print(head(grid_ith$INFER[[idx[jj]]]))
}
## -----------------------------------------------------------------------------
opt_cps = list()
for(jj in seq(length(idx))){
# jj = 1
tab = table(smart_digits(grid_ith$INFER[[idx[jj]]]$infer_CP,4),
grid_ith$INFER[[idx[jj]]]$infer_A)
rownames(tab) = sprintf("CP = %s",rownames(tab))
colnames(tab) = sprintf("ALLOC = %s",colnames(tab))
opt_cps[[sprintf("Solution %s",idx[jj])]] = tab
}
opt_cps
## -----------------------------------------------------------------------------
opt = list()
for(jj in seq(length(idx))){
# jj = 1
opt_cc = gg$cc[idx[jj]]
opt_kk = gg$kk[idx[jj]]
mat = eS[[opt_cc]][[opt_kk]]
dimnames(mat) = list(sprintf("ALLOC = %s",seq(opt_cc)),sprintf("SC%s",seq(opt_cc)))
opt[[sprintf("Solution %s",idx[jj])]] = mat
}
opt
## ----sessInfo-----------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.