#' Create a mixture of subclones
#'
#' @param subClones A list of K subclones, as provided by [buildSubclones()].
#' Each subclone is a data.frame with the following columns: \describe{
#'
#' \item{ct}{total copy number in the tumor}
#' \item{baft}{B allele fraction in the tumor}
#' \item{genotype}{germline genotype in 0, 0.5, 1}
#' \item{cn}{total copy number in the matched normal}
#' \item{bafn}{B allele fraction in the matched normal}}
#'
#' @param W A n x K matrix of weights in \eqn{[0,1]}, as generated by
#' [rSparseWeightMatrix()].
#'
#' @return A list of n samples obtained by mixing the pure subclones according
#' to the proportions specified in argument W.
#'
#' @details The mixture is performed at the scale of minor and major copy
#' numbers (which is the scale at which copy number events occur). Therefore
#' the data is first mapped to this scale, then mixed, then mapped back to the
#' original (tcn, dh) scale.
#'
#' By construction, dh, minor and major CN are only defined for heterozygous
#' SNPs. Therefore, in the output data, the corresponding columns are missing
#' for homozygous SNPs. Total copy numbers are *not* missing, so it makes
#' sense to keep the homozygous SNPs in the output data as they will be
#' exploited at the segmentation step.
#'
#' @examples
#' dataAnnotTP <- acnr::loadCnRegionData(dataSet="GSE11976", tumorFrac=1)
#' dataAnnotN <- acnr::loadCnRegionData(dataSet="GSE11976", tumorFrac=0)
#' len <- 500*10 ## Number of loci
#' K <- 2L ## Number of subclones
#' n <- 3L ## Number of samples
#' bkps <- list(c(100, 250)*10, c(150, 400)*10)
#' regions <- list(c("(0,3)", "(0,2)", "(1,2)"), c("(1,1)", "(0,1)", "(1,1)"))
#' datSubClone <- buildSubclones(len=len, nbClones=K, bkps=bkps, regions=regions,
#' dataAnnotTP=dataAnnotTP, dataAnnotN=dataAnnotN)
#' W <- rSparseWeightMatrix(nb.samp=n, nb.arch=K)
#' mixture <- mixSubclones(datSubClone, W=W)
#'
#' @export
mixSubclones <- function(subClones, W) {
## sanity checks: dimensions
stop_if_not(length(dim(W)) == 2L)
stop_if_not(length(subClones) == ncol(W))
## sanity check: column names
expectedNames <- c("ct", "baft", "genotype", "cn", "bafn", "pos")
lapply(subClones, FUN = function(dat){
mm <- match(expectedNames, colnames(dat))
if (any(is.na(mm))) {
lost <- paste(expectedNames[which(is.na(mm))], collapse = ", ")
stop("Missing column(s) in argument 'subClones': ", lost)
}
})
## sanity check: weights
if (any(W < 0)) {
stop("Expecting non-negative weight matrix")
}
if (any(rowSums(W) > 1L)) {
stop("Total tumor fraction larger than 1, please make sure that the row sums of 'W' are at most 1")
}
idxHom <- which(subClones[[1]]$genotype != 0.5)
sc <- sapply(seq_len(length(subClones)-1), FUN = function(i) {
# Test if genotype of i is equal to genotype of j
genoI <- subClones[[i]]$genotype
for (j in (i+1):length(subClones)) {
genoJ <- subClones[[j]]$genotype
if (sum(genoI == genoJ) != length(genoI)) {
stop(sprintf("Genotypes are not identical for sublones %s and %s", i, j))
}
}
})
## compute parental copy numbers
c1t <- sapply(subClones, FUN=function(ss) {
dh <- 2*abs(ss$baft-1/2)
c <- ss$ct*(1-dh)/2
})
c1n <- sapply(subClones, FUN=function(ss) {
dh <- 2*abs(ss$bafn-1/2)
c <- ss$cn*(1-dh)/2
})
c2t <- sapply(subClones, FUN=function(ss) {
dh <- 2*abs(ss$baft-1/2)
c <- ss$ct*(1+dh)/2
})
c2n <- sapply(subClones, FUN=function(ss) {
dh <- 2*abs(ss$bafn-1/2)
c <- ss$cn*(1+dh)/2
})
df.res <- apply(W, MARGIN=1L, FUN=function(weights) {
fracN <- 1 - sum(weights)
stop_if_not(fracN >= 0) ## should have been caught earlier by above sanity checks
c1 <- rowSums(cbind(sapply(seq_along(subClones), FUN=function(ii) {
weights[ii]*c1t[, ii]
}), fracN*rowMeans(c1n)))
c2 <- rowSums(cbind(sapply(seq_along(subClones), FUN=function(ii) {
weights[ii]*c2t[, ii]
}), fracN*rowMeans(c2n)))
tcn <- c1+c2
c1[idxHom] <- NA_real_
c1[idxHom] <- NA_real_
c2[idxHom] <- NA_real_
c2[idxHom] <- NA_real_
dh <- (c2-c1)/(c2+c1)
dh[idxHom] <- NA_real_
data.frame(c1=c1, c2=c2, tcn=tcn, dh=dh,
genotype=subClones[[1]]$genotype,
chr=rep(1, times=length(c1)),
pos=seq_along(c1))
})
df.res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.