# fitting copy number and cellular fractions (replaces clusteredcncf)
fitcncf <- function(out, dipLogR=0, nX=23) {
#' Copy number and cellular fraction of segment clusters
#' @param out the out element from procSample fit
#' @param dipLogR log-ratio level corresponding to diploid copy number
#' @param nX integer value of ChrX; humans 23 (default); mouse 20.
#' @details This function is called internally by procSample. It can be used to refit the model at a different dipLogR value.
#' @return A data frame with the same columns as out and three additional columns which give the cellular fraction (cf), total copy number (tcn) and lower copy number (lcn) for each segment. If a cluster does not have sufficient heterozygotes the lcn value will be NA.
# save the original out
cncf <- out
# get the segclust version of out
num.mark <- tapply(out$num.mark, out$segclust, sum)
segclust <- as.numeric(names(num.mark))
nhet <- tapply(out$nhet, out$segclust, sum)
cnlr.median <- tapply(out$cnlr.median.clust, out$segclust, median)
mafR <- tapply(out$mafR.clust, out$segclust, median)
out0 <- data.frame(segclust, num.mark, nhet, cnlr.median, mafR)
# fit the copy numbers and cellular fraction
out1 <- fitcncf0(out0, dipLogR)
ii <- match(cncf$segclust, out1$segclust)
cncf$cf <- out1$cf[ii]
cncf$tcn <- out1$tcn[ii]
cncf$lcn <- out1$lcn[ii]
# revise the copy numbers for X if subject is male
ii <- which(cncf$chrom==nX)
if (length(ii) > 0) {
# male if nhets on X chromosome is less than 1% of num.mark
if (sum(cncf$nhet[ii])/sum(cncf$num.mark[ii]) < 0.01) {
cncf$tcn[ii] <- ceiling(cncf$tcn[ii]/2)
cncf$lcn[ii] <- 0
# set cf to be 1 if tcn==1
cncf$cf[ii][cncf$tcn[ii]==1] <- 1
}
}
# if tcn is 0 or 1 lcn has to be zero
cncf$lcn[cncf$tcn<=1] <- 0
# return result
cncf
}
fitcncf0 <- function(out, dipLogR=0) {
# initialize vectors
out$lcn <- out$tcn <- out$cf <- out$ocn <- rep(NA_real_, nrow(out))
out$ocn <- 2^(1 + out$cnlr.median - dipLogR)
ocn0 <- out$ocn
maf0 <- 1/(1+exp(sqrt(pmax(0,out$mafR))))
# loop through the segment clusters
for(seg in 1:nrow(out)) {
ocn <- ocn0[seg]
maf <- maf0[seg]
if (is.finite(maf)) {
if (ocn > 2.15) {
tcn <- ceiling(ocn)
uu <- optcfutil(tcn, ocn, maf)
# it distance measure is large - 0.0225 when diff CN of 0.15
# make sure the fitted cf is not >0.99
if (uu[3] > 0.0225 | uu[2]>0.99) {
tcn1 <- tcn+1
uu1 <- optcfutil(tcn1, ocn, maf)
# if distance measure is reduced by at least 20%
if (uu1[3] < 0.8*uu[3]) {
# if distance is still large - 0.01 when diff CN of 0.1
if (uu1[3] > 0.01) {
tcn2 <- tcn+2
uu2 <- optcfutil(tcn2, ocn, maf)
if (uu2[3] < 0.8*uu1[3]) {
uu1 <- uu2
tcn1 <- tcn2
}
}
uu <- uu1
tcn <- tcn1
}
}
out$tcn[seg] <- tcn
out$lcn[seg] <- uu[1]
out$cf[seg] <- uu[2]
} else {
if (ocn < 1.85) {
# choose between 0 & 1
uu0 <- optcfutil(0, ocn, maf)
uu1 <- optcfutil(1, ocn, maf)
if (uu0[3] < uu1[3]) {
out$tcn[seg] <- 0
out$lcn[seg] <- uu0[1]
out$cf[seg] <- uu0[2]
} else {
out$tcn[seg] <- 1
out$lcn[seg] <- uu1[1]
out$cf[seg] <- uu1[2]
}
} else {
# ocn between 1.85 & 2.15; choose between 1+1 & 2+0
uu2 <- optcfutil(2, ocn, maf)
# if optimal cf is > 15% call it 2+0 o.w 1+1
out$tcn[seg] <- 2
if (uu2[2] > 0.15) {
out$lcn[seg] <- uu2[1]
out$cf[seg] <- uu2[2]
} else {
out$lcn[seg] <- 1
out$cf[seg] <- 1
}
}
}
}
}
# merge thc cf levels
out <- mergecf(out)
# now fill in the tcn for clusters with NA for mafR using 3rd quartile cf
ii <- which(out$cf < 1)
# don't want to call deletions and amplications in low purity samples
if (length(ii) > 0) {
cf3q <- max(quantile(rep(out$cf[ii], out$num.mark[ii]), 0.75), 0.3)
} else {
cf3q <- 0.3
}
for(seg in 1:nrow(out)) {
if (is.na(maf0[seg])) {
ocn <- ocn0[seg]
# divide ocn by cf3q and round it
tcn <- round((ocn-2)/cf3q) + 2
if (tcn < 0) tcn <- 0
out$tcn[seg] <- tcn
# set the segment cf to be cf3q
out$cf[seg] <- cf3q
}
}
out
}
# minimizes the distance(1+l*cf - ocn1)^2 + (1+k*cf - ocn2)^2 to get "cf"
# where ocn1 = ocn*maf, ocn2=ocn*(1-maf), l <= k true copy numbers in tumor
optcfutil <- function(tcn, ocn, maf) {
cn1 <- ocn*maf
cn2 <- ocn*(1-maf)
maxlcn <- floor(tcn/2)
oo <- sapply(0:maxlcn, function(l, tcn, cn1, cn2) {
k <- tcn - l
if (k==1 & l==1) {
cf = 1
} else {
cf = ((l-1)*(cn1-1) + (k-1)*(cn2-1))/((l-1)^2+(k-1)^2)
}
# if the optimal cf outside (0,1) set it at boundary
if (cf < 0) cf <- 0
if (cf > 1) cf <- 1
# calculate utility
util <- (1+(l-1)*cf - cn1)^2 + (1+(k-1)*cf - cn2)^2
c(l,cf,util)
}, tcn, cn1, cn2)
oo[,which.min(oo[3,])]
}
# merging cellular fractions
mergecf <- function(out) {
# clusters with defined cf and less than 1
out0 <- out[which(out$cf < 1),]
if (nrow(out0) > 0) {
# order out0 by cf
out0 <- out0[order(out0$cf),]
# prepare the variables needed for deviance
maf <- 1/(1+exp(sqrt(pmax(out0$mafR,0))))
acn1o <- out0$ocn*maf
acn2o <- out0$ocn*(1-maf)
acn1t <- out0$lcn - 1
acn2t <- out0$tcn-out0$lcn -1
cf <- out0$cf
nm <- out0$num.mark
# merge cf values closest to one another
cfclust0 <- cfclust <- 1:nrow(out0)
cflevels0 <- cflevels <- cf
dev1 <- dev0 <- sum(((1+cf*acn1t - acn1o)^2 + (1+cf*acn2t - acn2o)^2)*nm)
while (length(cflevels)>1 & dev1 <= 1.1*dev0) {
# save the previous levels
cflevels0 <- cflevels
cfclust0 <- cfclust
# dev1a <- dev1
# update
j <- which.min(diff(cflevels))
cflevels <- cflevels[-j]
cfclust[cfclust>j] <- cfclust[cfclust>j] - 1
cflevels[j] <- sum((nm*cf)[cfclust==j])/sum(nm[cfclust==j])
dev1 <- sum(((1+cflevels[cfclust]*acn1t - acn1o)^2 + (1+cflevels[cfclust]*acn2t - acn2o)^2)*nm)
}
# replace the existing cf with merged cf
ii <- match(out0$segclust, out$segclust)
out$cf[ii] <- cflevels0[cfclust0]
}
out
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.