Nothing
#' Preclustering method
#'
#' This method clusters mutations based on the probability that they are from the same
#' distribution.
#' It first computes the zscore associated with a normalized number of alternative reads and depth.
#' The "normalized" number of reads is the number of alternative reads expected if the mutation was at a single
#' copy in a diploid genome.
#'
#' @param Schrod_cells The classic output from Schrodinger function
#' @return returns a list with:
#' \describe{
#' \item{similarityMatrix}{ The matrix of probabilities}
#' \item{distance}{The dissimilarity matrix}
#' \item{tree}{The tree obtained by hierachical clustering of the dissimilarity matrix using "ward.D2" method}
#' }
#'
#'@importFrom stats hclust as.dist cutree hclust
Cellular_preclustering<-function(Schrod_cells){
for(i in 1:length(Schrod_cells)){
Schrod_cells[[i]]$Norm_Alt<-round(
Schrod_cells[[i]]$Alt * Schrod_cells[[i]]$NCh / (2 * Schrod_cells[[i]]$NC)
)
}
DistMat<-ProbDistMatrix(Schrod_cells)
dissimMatrix<-as.dist(1-DistMat)
tree<-hclust(d = dissimMatrix,method = "ward.D2")
#cut_tree<-cutree(tree = tree,k = majority)
result<-list(similarityMatrix = DistMat,
distance = dissimMatrix,
tree = tree
)
result
}
#' Create priors from hierarchical clustering
#'
#' Creates weights and position priors from the hierachical clustering (tree) given
#' a number of clusters Nclust. The centers of each cluster is found by
#' \eqn{Center = \frac{\sum\limits_{m \in cluster}{Normalized Alt_{m}}}{\sum\limits_{m \in cluster}{Normalized Alt_{m}}}}
#' @param tree The tree generated by Cellular_preclustering
#' @param Schrod_cells The classic output from Schrodinger function
#' @param NClus the number of clusters to cut the data
#' @param jitter Should it jitter weights and centers around values found?
#' @return returns a list with:
#' \describe{
#' \item{weigths}{The proportion of mutations in each cluster}
#' \item{centers}{A list with a numeric vector for each sample, with the cellularity of each cluster}
#' }
#'
#' @importFrom stats cutree
Create_prior_cutTree<-function(tree,Schrod_cells,NClus,jitter = FALSE){
for(i in 1:length(Schrod_cells)){
Schrod_cells[[i]]$Norm_Alt<-round(
Schrod_cells[[i]]$Alt * Schrod_cells[[i]]$NCh / (2 * Schrod_cells[[i]]$NC)
)
}
clustering<-cutree(tree,k = NClus)
weights<-table(clustering)/length(clustering)
if(jitter){
weights<-jitter(weights)
weights[weights<0]<-0
weights<-weights/sum(weights)
}
centers<-list()
for(i in 1:length(Schrod_cells)){
if(jitter){
centers[[i]]<-jitter(2 * unlist(sapply(as.numeric(names(weights)),
function(cl){
sum(Schrod_cells[[i]]$Norm_Alt[clustering==cl])/
sum(Schrod_cells[[i]]$Depth[clustering==cl])
}
)
)
)
}
else{
centers[[i]]<-2 * unlist(sapply(as.numeric(names(weights)),
function(cl){
sum(Schrod_cells[[i]]$Norm_Alt[clustering==cl])/
sum(Schrod_cells[[i]]$Depth[clustering==cl])
}
)
)
}
centers[[i]][centers[[i]]>1]<-1
centers[[i]][centers[[i]]<=0]<-.Machine$double.eps
}
list(weights = weights,centers = centers)
}
#' Distance
#'
#' Creates a matrix of distance between two points based on the p-value to be from the same distribution
#' @param Schrod_cells The classic output from Schrodinger function, with the normalized Alt
#' @return returns a square numeric matrix
#' @importFrom stats pnorm
ProbDistMatrix<-function(Schrod_cells){
result<-matrix(0,nrow = nrow(Schrod_cells[[1]]),ncol = nrow(Schrod_cells[[1]]))
for(l in 1:length(Schrod_cells)){
### z-score should be >0
result<-result+zscore(Depth = Schrod_cells[[l]]$Depth,
Alt = Schrod_cells[[l]]$Norm_Alt
)
}
2*pnorm(-result**(1/2))
}
#' Z-score
#'
#' Computes the z-score of mutations being from the same distribution
#' @param Depth a numeric vector of depth of sequencing for each variant
#' @param Alt a numeric vector of the number of reads supporting each variant
#' @return returns a square numeric matrix
zscore<-function(Depth,Alt){
n<-length(Depth)
result<-matrix(ncol = n,nrow = n)
for(i in 1:n){
p<-(Alt+Alt[i])/(Depth + Depth[i])
w0<-which(p==0)
w1<-which(p>1)
p1<-Alt[i]/Depth[i]
p2<-Alt/Depth
result[,i]<-((p1-p2)**2)/(p*(1-p)*(1/Depth[i]+1/Depth))
result[w0,i]<-0
result[w1,i]<-+Inf
result[is.na(result[,i]),i]<-+Inf
}
result
}
#' Majority vote
#'
#' Extract majority vote from multiple indices
#' @param index vector with number of clusters selected by indices
#' @return Numeric value of the number of clusters to chose
MajorityVote<-function(index){
tab<-table(index)
test<-tab==max(tab)
if(sum(test)>1){ # return weighted center
return(
round(sum(tab[test]*as.numeric(names(tab[test])))/sum(tab[test]))
)
}
else{
return(names(which.max(tab)))
}
}
#' Flash QuantumClone
#'
#' Fast method to find clones without filtering for multiple states
#' @param Cells Input for QuantumClone with genotype required
#' @param conta vector with contamination fraction in each sample
#' @param Nclus vector with the number of clusters to test (alternatively only min and max values)
#' @param model.selection One of "tree", "AIC", "BIC" or numeric. "tree" will use "ccc","ch" and "gap" methods from NbClust
#' to determine the number of clusters. "BIC","AIC" or numeric values will use methods from QuantumClone.
#' @examples
#' set.seed(123)
#' #1: Cluster data
#' In<-QuantumClone::Input_Example
#' FQC<-FlashQC(In,conta = c(0,0),Nclus = 2:10)
#'
#' #2: Get order variants by clones:
#' ord<-order(In[[1]]$Chr)
#' #3: Visualize clustering:
#' image(
#' 1:nrow(In[[1]]),
#' 1:nrow(In[[1]]),
#' FQC$similarity[ord,ord],
#' xlab="", ylab="")
#' #4: add limit of real clusters:
#' abline(h = cumsum(table(In[[1]]$Chr[ord]))+1)
#' abline(v = cumsum(table(In[[1]]$Chr[ord]))+1)
#'
#' #5: alternatively add clusters found:
#' ord<-order(FQC$cluster)
#' image(
#' 1:nrow(In[[1]]),
#' 1:nrow(In[[1]]),
#' FQC$similarity[ord,ord],
#' xlab="", ylab="")
#' abline(h = cumsum(table(FQC$cluster[ord]))+1)
#' abline(v = cumsum(table(FQC$cluster[ord]))+1)
#' # Show clustering quality:
#' NMI_cutree( FQC$cluster,chr = In[[1]]$Chr)
#' @export
#' @importFrom NbClust NbClust
#' @seealso QuantumClone
FlashQC<-function(Cells,conta,Nclus,model.selection = "tree"){
if(is.data.frame(Cells)){
Cells<-list(Cells)
}
else if(!is.list(Cells)){
stop("Incorrect input: Cells... exiting")
}
message("Processing input data...")
Schrod_cells<-Patient_schrodinger_cellularities(SNV_list = Cells,
contamination = conta,
Genotype_provided = TRUE)
if(nrow(Schrod_cells[[1]])==nrow(Cells[[1]])){
recluster<-FALSE
}
else{
recluster<-TRUE
}
for(i in 1:length(Schrod_cells)){
Schrod_cells[[i]]$Norm_Alt<-round(
Schrod_cells[[i]]$Alt * Schrod_cells[[i]]$NCh / (2 * Schrod_cells[[i]]$NC)
)
}
message("Clustering...")
DistMat<-ProbDistMatrix(Schrod_cells)
dissimMatrix<-as.dist(1-DistMat)
tree<-hclust(d = dissimMatrix,method = "ward.D2")
##################################
### Find correct number of clusters
##################################
majority<-FLASH_main(Schrod_cells = Schrod_cells,
model.selection = model.selection,
conta = conta,Nclus = Nclus,tree = tree,
dissimMatrix = dissimMatrix)
cut_tree<-cutree(tree = tree,k = majority)
priors<-Create_prior_cutTree(tree = tree,
Schrod_cells = Schrod_cells,
NClus = majority
)
if(recluster){
message("Keeping most likely positions and reclustering...")
adj.factor<-Compute.adj.fact(Schrod_cells)
fik<-eval.fik(Schrod = Schrod_cells,
centers = priors$centers,
weights = priors$weights,
adj.factor = adj.factor,
keep.all.poss = TRUE)
filtered.data<-filter_on_fik(Schrod_cells,fik = fik)
DistMat<-ProbDistMatrix(filtered.data)
dissimMatrix<-as.dist(1-DistMat)
tree<-hclust(d = dissimMatrix,method = "ward.D2")
majority<-FLASH_main(Schrod_cells = filtered.data,
model.selection = model.selection,
conta = conta,Nclus = Nclus,tree = tree,
dissimMatrix = dissimMatrix)
cut_tree<-cutree(tree = tree,k = majority)
priors<-Create_prior_cutTree(tree = tree,
Schrod_cells = filtered.data,
NClus = majority
)
result<-list(similarity = DistMat,
distance = dissimMatrix,
tree = tree,
cluster = cut_tree,
majority = majority,
weights = priors$weights,
centers = priors$centers,
filtered.data = filtered.data
)
}
else{
result<-list(similarity = DistMat,
distance = dissimMatrix,
tree = tree,
cluster = cut_tree,
majority = majority,
weights = priors$weights,
centers = priors$centers,
filtered.data = Schrod_cells
)
}
result
}
#' Compute value of objective function
#'
#' Compute the value of clustering based on same principles as QuantumClone EM
#' @param tree Tree from hierarchical clustering
#' @param nclus Number of clusters used for cutting (numeric of length 1)
#' @param Schrod Output from Schrodinger cellularities
#' @param conta Numeric value of contamination fraction in each sample
Compute_objective<-function(tree,nclus,Schrod,conta){
priors<-Create_prior_cutTree(tree = tree,Schrod_cells = Schrod,NClus = nclus)
adj.factor<-Compute.adj.fact(Schrod)
fik<-eval.fik(Schrod = Schrod,
centers = priors$centers,
weights = priors$weights,
adj.factor = adj.factor,
keep.all.poss = TRUE
)
w<-which(fik == 0)
if(length(w)){
fik<--fik *log(fik )
fik[w]<-0
return(sum(fik))
}
else{
return(-sum(fik *log(fik )))
}
}
#' Compute criterion FLASH
#'
#' Computes BIC from a list of outputs of EM algorithm, then returns the position with minimal BIC
#' @param Obj Numeric vector with objective function values
#' @param Mut_num Number of mutations to cluster
#' @param k the number of clusters (in the same order as Obj)
#' @param model.selection The function to minimize for the model selection: can be "AIC", "BIC", or numeric. In numeric, the function
#' @param s Number of samples
#' @keywords EM clustering number
BIC_criterion_FLASH<-function(Obj,Mut_num,k ,model.selection,s){
### Criterion should be minimized
# Here we assimilate EM.output$val to -ln(L) where L is the likelihood of the model
# BIC is written -2*ln(L)+k*ln(k)
# Generalized BIC is written -2*ln(L)+q * k*ln(k)
# AIC is written 2*k - 2*ln(L)
if(is.numeric(model.selection)){
### Modified BIC to relax or add constraints on model selection
# if q > 1 adding explicative variables should explain observed values better => control overfitting
# if q = 1 BIC
# if q < 1 adding explicative variables is less costly
Bic<-numeric()
#k<-length(EM_out_list[[i]]$EM.output$centers[[1]])
Bic<-2*Obj+model.selection * s * k *log(Mut_num)
return(Bic)
}
else if(model.selection == "BIC"){
Bic<-2*Obj +s* k *log(Mut_num)
return(Bic)
}
else if(model.selection == "AIC"){
Aic<-2*Obj+2*k*s
return(Aic)
}
}
#' Flash core
#'
#' Returns number of clusters based on model selection
#' @param Schrod_cells Output from Schrodinger cellularities
#' @param conta vector with contamination fraction in each sample
#' @param Nclus vector with the number of clusters to test (alternatively only min and max values)
#' @param model.selection One of "tree", "AIC", "BIC" or numeric. "tree" will use "ccc","ch" and "gap" methods from NbClust
#' to determine the number of clusters. "BIC","AIC" or numeric values will use methods from QuantumClone.
#' @param tree Hierarchical tree from hclust
#' @param dissimMatrix Dissimilarity matrix, required if model selection is "tree"
FLASH_main<-function(Schrod_cells,model.selection,conta,Nclus,tree = NULL,dissimMatrix=NULL){
if(grepl(x = "tree",pattern = model.selection)){
### Using NbClust
Cells<-matrix(data = 0,ncol = length(Schrod_cells),nrow = nrow(Schrod_cells[[1]]))
for(i in 1:length(Schrod_cells)){
Cells[,i]<-Schrod_cells[[i]]$Alt/Schrod_cells[[i]]$Depth*
Schrod_cells[[i]]$NCh/(Schrod_cells[[i]]$NC * 1-conta[i])
}
index<- c("ch","ccc","gap")
method<-"ward.D2"
if(ncol(Cells)==1){
Cells<-cbind(Cells,jitter(Cells))
}
selected<-unlist(sapply(X =index,function(name){
NbClust::NbClust(data = Cells,diss = dissimMatrix, distance = NULL,
method = method,index = name,
min.nc = min(Nclus),
max.nc = max(Nclus)
)$Best.nc["Number_clusters"]
})
)
majority<-MajorityVote(selected)
}else{
### Compute objective for each cluster value
obj<-sapply(Nclus,function(nclus){
Compute_objective(tree = tree,
nclus = nclus,
Schrod = Schrod_cells,
conta = conta)
}
)
### Adapt to BIC selection
if(is.list(Schrod_cells)){
s<-length(Schrod_cells)
}
else{
s<-1
}
Crit<-BIC_criterion_FLASH(Obj = obj,Mut_num = nrow(Schrod_cells[[1]]),
k = Nclus,
model.selection = model.selection,
s = s
)
majority<-Nclus[which.min(Crit)]
}
#tree<-hclust(d = dissimMatrix,method = method)
majority
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.