# License Part ------------------------------------------------------------
# MIT License
#
# Copyright (c) [2018] [Geoffrey Macintyre]
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# Code Part ---------------------------------------------------------------
fitComponent <-
function(dat,
dist = "norm",
seed = 77777,
model_selection = "BIC",
min_prior = 0.001,
niter = 1000,
nrep = 1,
min_comp = 2,
max_comp = 10)
{
control <- new("FLXcontrol")
control@minprior <- min_prior
control@iter.max <- niter
set.seed(seed)
if (dist == "norm")
{
if (min_comp == max_comp)
{
fit <-
flexmix::flexmix(
dat ~ 1,
model = flexmix::FLXMCnorm1(),
k = min_comp,
control = control
)
} else{
fit <-
flexmix::stepFlexmix(
dat ~ 1,
model = flexmix::FLXMCnorm1(),
k = min_comp:max_comp,
nrep = nrep,
control = control
)
if (inherits(fit, "stepFlexmix")) {
fit <- flexmix::getModel(fit, which = model_selection)
}
}
} else if (dist == "pois")
{
if (min_comp == max_comp)
{
fit <-
flexmix::flexmix(
dat ~ 1,
model = flexmix::FLXMCmvpois(),
k = min_comp,
control = control
)
} else{
fit <-
flexmix::stepFlexmix(
dat ~ 1,
model = flexmix::FLXMCmvpois(),
k = min_comp:max_comp,
nrep = nrep,
control = control
)
if (inherits(fit, "stepFlexmix")) {
fit <- flexmix::getModel(fit, which = model_selection)
}
}
}
fit
}
calculateSumOfPosteriors <-
function(CN_feature,
components,
name,
rowIter = 1000,
cores = 1)
{
if (cores > 1) {
# require(foreach)
# require(doMC)
requireNamespace("foreach", quietly = T)
#requireNamespace("doMC", quietly = T)
requireNamespace("doParallel", quietly = T)
len = dim(CN_feature)[1]
iters = floor(len / rowIter)
lastiter = iters[length(iters)]
#registerDoMC(cores)
registerDoParallel(cores = cores)
curr_posterior = foreach(i = 0:iters, .combine = rbind) %dopar% {
start = i * rowIter + 1
if (i != lastiter) {
end = (i + 1) * rowIter
} else {
end = len
}
flexmix::posterior(components, data.frame(dat = as.numeric(CN_feature[start:end, 2])))
}
} else {
curr_posterior <-
flexmix::posterior(components, data.frame(dat = as.numeric(CN_feature[, 2])))
}
mat <- cbind(CN_feature, curr_posterior)
posterior_sum <- c()
## foreach and parallelising doesn't make the following code faster.
for (i in unique(mat$ID))
{
posterior_sum <-
rbind(posterior_sum, colSums(mat[mat$ID == i, c(-1, -2)]))
}
params <- flexmix::parameters(components)
if (!is.null(nrow(params)))
{
posterior_sum <- posterior_sum[, order(params[1,])]
}
else
{
posterior_sum <- posterior_sum[, order(params)]
}
colnames(posterior_sum) <-
paste0(name, 1:ncol(posterior_sum))
rownames(posterior_sum) <- rownames(unique(mat$ID))
posterior_sum
}
getSegsize <- function(abs_profiles)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
}
else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
segTab$segVal[as.numeric(segTab$segVal) < 0] <- 0
seglen <-
(as.numeric(segTab$end) - as.numeric(segTab$start))
seglen <- seglen[seglen > 0]
out <-
rbind(out, cbind(ID = rep(i, length(seglen)), value = seglen))
}
rownames(out) <- NULL
data.frame(out, stringsAsFactors = F)
}
getBPnum <- function(abs_profiles, chrlen)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
} else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
# unify chromosome column
segTab$chromosome = as.character(segTab$chromosome)
segTab$chromosome = sub(
pattern = "chr",
replacement = "chr",
x = segTab$chromosome,
ignore.case = TRUE
)
if (any(!grepl("chr", segTab$chromosome))) {
segTab$chromosome[!grepl("chr", segTab$chromosome)] = paste0("chr", segTab$chromosome[!grepl("chr", segTab$chromosome)])
}
if (any(grepl("chr23", segTab$chromosome))) {
warning("'23' is not a supported chromosome, related rows will be discarded.")
segTab = segTab[!grepl("chr23", segTab$chromosome),]
}
chrs <- unique(segTab$chromosome)
allBPnum <- c()
for (c in chrs)
{
currseg <- segTab[segTab$chromosome == c,]
intervals <-
seq(1, chrlen[chrlen[, 1] == c, 2] + 10000000, 10000000)
res <-
hist(as.numeric(currseg$end[-nrow(currseg)]),
breaks = intervals,
plot = FALSE)$counts
allBPnum <- c(allBPnum, res)
}
out <-
rbind(out, cbind(ID = rep(i, length(allBPnum)), value = allBPnum))
}
rownames(out) <- NULL
data.frame(out, stringsAsFactors = F)
}
getOscilation <- function(abs_profiles)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
} else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
# unify chromosome column
segTab$chromosome = as.character(segTab$chromosome)
segTab$chromosome = sub(
pattern = "chr",
replacement = "chr",
x = segTab$chromosome,
ignore.case = TRUE
)
if (any(!grepl("chr", segTab$chromosome))) {
segTab$chromosome[!grepl("chr", segTab$chromosome)] = paste0("chr", segTab$chromosome[!grepl("chr", segTab$chromosome)])
}
if (any(grepl("chr23", segTab$chromosome))) {
warning("'23' is not a supported chromosome, related rows will be discarded.")
segTab = segTab[!grepl("chr23", segTab$chromosome),]
}
chrs <- unique(segTab$chromosome)
oscCounts <- c()
for (c in chrs)
{
currseg <- segTab[segTab$chromosome == c, "segVal"]
currseg <- round(as.numeric(currseg))
if (length(currseg) > 3)
{
prevval <- currseg[1]
count = 0
for (j in 3:length(currseg))
{
if (currseg[j] == prevval & currseg[j] != currseg[j - 1])
{
count <- count + 1
} else{
oscCounts <- c(oscCounts, count)
count = 0
}
prevval <- currseg[j - 1]
}
}
}
out <-
rbind(out, cbind(ID = rep(i, length(oscCounts)), value = oscCounts))
if (length(oscCounts) == 0)
{
out <- rbind(out, cbind(ID = i, value = 0))
}
}
rownames(out) <- NULL
data.frame(out, stringsAsFactors = F)
}
getCentromereDistCounts <-
function(abs_profiles, centromeres, chrlen)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
} else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
# unify chromosome column
segTab$chromosome = as.character(segTab$chromosome)
segTab$chromosome = sub(
pattern = "chr",
replacement = "chr",
x = segTab$chromosome,
ignore.case = TRUE
)
if (any(!grepl("chr", segTab$chromosome))) {
segTab$chromosome[!grepl("chr", segTab$chromosome)] = paste0("chr", segTab$chromosome[!grepl("chr", segTab$chromosome)])
}
if (any(grepl("chr23", segTab$chromosome))) {
warning("'23' is not a supported chromosome, related rows will be discarded.")
segTab = segTab[!grepl("chr23", segTab$chromosome),]
}
chrs <- unique(segTab$chromosome)
all_dists <- c()
for (c in chrs)
{
if (nrow(segTab) > 1)
{
starts <- as.numeric(segTab[segTab$chromosome == c, 2])[-1]
segstart <-
as.numeric(segTab[segTab$chromosome == c, 2])[1]
ends <-
as.numeric(segTab[segTab$chromosome == c, 3])
segend <- ends[length(ends)]
ends <- ends[-length(ends)]
# centstart <-
# as.numeric(centromeres[substr(centromeres[, 2], 4, 5) == c, 3])
# centend <-
# as.numeric(centromeres[substr(centromeres[, 2], 4, 5) == c, 4])
# chrend <- chrlen[substr(chrlen[, 1], 4, 5) == c, 2]
centstart <-
as.numeric(centromeres[centromeres$chrom == c, 2])
centend <-
as.numeric(centromeres[centromeres$chrom == c, 3])
chrend <- chrlen[chrlen$chrom == c, 2]
ndist <-
cbind(rep(NA, length(starts)), rep(NA, length(starts)))
ndist[starts <= centstart, 1] <-
(centstart - starts[starts <= centstart]) / (centstart - segstart) * -1
ndist[starts >= centend, 1] <-
(starts[starts >= centend] - centend) / (segend - centend)
ndist[ends <= centstart, 2] <-
(centstart - ends[ends <= centstart]) / (centstart - segstart) * -1
ndist[ends >= centend, 2] <-
(ends[ends >= centend] - centend) / (segend - centend)
ndist <- apply(ndist, 1, min)
all_dists <- rbind(all_dists, sum(ndist > 0))
all_dists <- rbind(all_dists, sum(ndist <= 0))
}
}
if (nrow(all_dists) > 0)
{
out <- rbind(out, cbind(ID = i, value = all_dists[, 1]))
}
}
rownames(out) <- NULL
out = data.frame(out, stringsAsFactors = F)
out = na.omit(out)
out
}
getChangepointCN <- function(abs_profiles)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
}
else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
segTab$segVal[as.numeric(segTab$segVal) < 0] <- 0
chrs <- unique(segTab$chromosome)
allcp <- c()
for (c in chrs)
{
currseg <- as.numeric(segTab[segTab$chromosome == c, "segVal"])
allcp <-
c(allcp, abs(currseg[-1] - currseg[-length(currseg)]))
}
if (length(allcp) == 0)
{
allcp <- 0 #if there are no changepoints
}
out <-
rbind(out, cbind(ID = rep(i, length(allcp)), value = allcp))
}
rownames(out) <- NULL
data.frame(out, stringsAsFactors = F)
}
getCN <- function(abs_profiles)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
}
else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
segTab$segVal[as.numeric(segTab$segVal) < 0] <- 0
cn <- as.numeric(segTab$segVal)
out <-
rbind(out, cbind(ID = rep(i, length(cn)), value = cn))
}
rownames(out) <- NULL
data.frame(out, stringsAsFactors = F)
}
getSampNames <- function(abs_profiles)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
samps <- colnames(abs_profiles)
}
else
{
samps <- names(abs_profiles)
}
samps
}
getSegTable <- function(x)
{
if (!requireNamespace("Biobase", quietly = TRUE)){
stop("Package \"Biobase\" needed for this function to work. Please install it.",
call. = FALSE)
}
dat <- x
sn <- Biobase::assayDataElement(dat, "segmented")
fd <- Biobase::fData(dat)
fd$use -> use
fdfiltfull <- fd[use,]
sn <- sn[use,]
segTable <- c()
for (c in unique(fdfiltfull$chromosome))
{
snfilt <- sn[fdfiltfull$chromosome == c]
fdfilt <- fdfiltfull[fdfiltfull$chromosome == c,]
sn.rle <- rle(snfilt)
starts <-
cumsum(c(1, sn.rle$lengths[-length(sn.rle$lengths)]))
ends <- cumsum(sn.rle$lengths)
lapply(1:length(sn.rle$lengths), function(s) {
from <- fdfilt$start[starts[s]]
to <- fdfilt$end[ends[s]]
segValue <- sn.rle$value[s]
c(fdfilt$chromosome[starts[s]], from, to, segValue)
}) -> segtmp
segTableRaw <-
data.frame(matrix(unlist(segtmp), ncol = 4, byrow = T), stringsAsFactors =
F)
segTable <- rbind(segTable, segTableRaw)
}
colnames(segTable) <- c("chromosome", "start", "end", "segVal")
segTable
}
getPloidy <- function(abs_profiles)
{
out <- c()
samps <- getSampNames(abs_profiles)
for (i in samps)
{
if (class(abs_profiles) == "QDNAseqCopyNumbers")
{
segTab <-
getSegTable(abs_profiles[, which(colnames(abs_profiles) == i)])
}
else
{
segTab <- abs_profiles[[i]]
colnames(segTab)[4] <- "segVal"
}
segLen <-
(as.numeric(segTab$end) - as.numeric(segTab$start))
ploidy <-
sum((segLen / sum(segLen)) * as.numeric(segTab$segVal))
out <- c(out, ploidy)
}
data.frame(out, stringsAsFactors = F)
}
normaliseMatrix <- function(signature_by_sample, sig_thresh = 0.01)
{
norm_const <- colSums(signature_by_sample)
sample_by_signature <-
apply(signature_by_sample, 1, function(x) {
x / norm_const
})
sample_by_signature <-
apply(sample_by_signature, 1, lower_norm, sig_thresh)
signature_by_sample <- t(sample_by_signature)
norm_const <- apply(signature_by_sample, 1, sum)
sample_by_signature <-
apply(signature_by_sample, 2, function(x) {
x / norm_const
})
signature_by_sample <- t(sample_by_signature)
signature_by_sample
}
lower_norm <- function(x, sig_thresh = 0.01)
{
new_x <- x
for (i in 1:length(x))
{
if (x[i] < sig_thresh)
{
new_x[i] <- 0
}
}
new_x
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.