##############################################################################################################################
# Additional parsers
##############################################################################################################################
#' Helper function to read in the DP algorithm files
#' @param indir The directory in which the files were stored
#' @return A GS.data object with fields S.i, V.h, pi.h and alpha
#' @author sd11
read_gsdata_object = function(indir, samplename, no.iters, conc_param, cluster_conc) {
GS.data = list()
filename_prefix = file.path(indir, paste(samplename, "_2D_iters", no.iters, "_concParam", conc_param, "_clusterWidth", 1/cluster_conc, sep=""))
GS.data$S.i = as.matrix(read.csv(paste(filename_prefix, "_states.csv", sep=""), row.names=1))
GS.data$V.h = as.matrix(read.csv(paste(filename_prefix, "_stickbreaking_weights.csv", sep=""), row.names=1))
GS.data$pi.h = as.matrix(read.csv(paste(filename_prefix, "_discreteMutationCopyNumbers.csv", sep=""), row.names=1))
GS.data$alpha = read.csv(paste(filename_prefix, "_alpha.csv", sep=""), row.names=1)
# Fix the dimensions of this array
pi.h = array(NA, c(nrow(GS.data$pi.h), ncol(GS.data$pi.h), 1))
pi.h[1:nrow(GS.data$pi.h), 1:ncol(GS.data$pi.h), 1] = GS.data$pi.h
GS.data$pi.h = pi.h
return(GS.data)
}
##############################################################################################################################
# Functions to build mutation coassignment probability matrices
##############################################################################################################################
#' Helper function that builds a mutation to mutation coassignment probability matrix out of mutation to cluster assignments
#' @param GS.data Main clustering output directly from the chain
#' @param density Posterior density through the MCMC output
#' @param no.muts The number of mutations in the data set
#' @param sampledIters Iterations to consider
#' @return A no.muts x no.muts matrix where each cell contains the fraction of iterations the pair of mutations was assigned to the same cluster
build_coassignment_prob_matrix_preferences = function(GS.data, density, no.muts, sampledIters) {
res = getLocalOptima(density, hypercube.size=5)
localOptima = res$localOptima
peak.indices = res$peak.indices
no.optima = length(localOptima)
boundary = array(NA,no.optima-1)
mutation.preferences = array(0,c(no.muts,no.optima))
for(i in 1:(no.optima-1)){
min.density = min(density$median.density[(peak.indices[i]+1):(peak.indices[i+1]-1)])
min.indices = intersect(which(density$median.density == min.density),(peak.indices[i]+1):(peak.indices[i+1]-1))
#what distance along the line between a pair of optima do we have to go to reach the minimum density
boundary[i] = (density$fraction.of.tumour.cells[max(min.indices)] + density$fraction.of.tumour.cells[min(min.indices)])/2
}
# Adapt this to make a table with the preferred cluster for each mutation in each iteration
S.i = data.matrix(GS.data$S.i)
pi.h = GS.data$pi.h[,,1]
mutation.preferences = array(0, c(no.muts, length(sampledIters)))
coassignments = array(0, c(no.muts, no.muts))
for(s in sampledIters) {
#temp.preferences = array(0,c(no.muts,no.optima))
for(c in unique(S.i[s,])){
bestOptimum = sum(pi.h[s,c]>boundary)+1
#temp.preferences[S.i[s,]==c,bestOptimum] = temp.preferences[S.i[s,]==c,bestOptimum] + 1
assigned.muts = which(S.i[s,]==c)
coassignments[assigned.muts, assigned.muts] = coassignments[assigned.muts, assigned.muts] + 1
}
#iter.preferences = t(apply(temp.preferences, 1, function(p) { as.integer(p==max(p)) / sum(p==max(p)) }))
#mutation.preferences = mutation.preferences + iter.preferences
}
coassignments = coassignments / length(sampledIters)
return(coassignments)
}
#' Fetch the CCFs of the clusters to which SNVs have been assigned during MCMC into a big table
#' @param pi.h The cluster CCFs
#' @param S.i The mutation assignments to cluster ids
#' @param no.muts Number of total SNVs
#' @param no.iters The total number of iterations
#' @param no.iters.burn.in The total number of iterations to use as burnin
#' @return A matrix with a column for each SNV and a row for each to consider iteration with the cell containing the CCF
#' @author sd11
get_snv_assignment_ccfs = function(pi.h, S.i, no.muts, no.timepoints, no.iters, no.iters.burn.in) {
#' Get assignment ccfs for each snv
no.iters.post.burnin = no.iters-no.iters.burn.in
snv_ccfs = array(NA, c(no.iters.post.burnin, no.muts, no.timepoints))
x = (no.iters.burn.in+1):no.iters
for (t in 1:no.timepoints) {
for (i in 1:no.muts) {
snv_ccfs[, i, t] = pi.h[cbind(x, S.i[-c(1:no.iters.burn.in), i], t)]
}
}
return(snv_ccfs)
}
#' Helper function that builds the a density over assignment CCFs for each mutation
get_snv_ccf_assignmnent_density = function(S.i, pi.h, no.iters.burn.in, ccf_max_value=10) {
snv_ccfs = get_snv_assignment_ccfs(pi.h, S.i, ncol(S.i), dim(pi.h)[3], nrow(S.i), no.iters.burn.in)
snv_ccfs = snv_ccfs[,,1]
snv_densities = lapply(1:ncol(snv_ccfs), function(i) {
if (all(snv_ccfs[,i] < ccf_max_value)) {
ggplot_build(ggplot(data.frame(ccf=snv_ccfs[,i])) + aes(x=ccf, y=..density..) + geom_density() + xlim(0, ccf_max_value))$data[[1]]$y
} else {
NA
}
})
snv_densities = lapply(snv_densities, function(dat) { dat/sum(dat) })
return(snv_densities)
}
#' Build a coassignment probability matrix using SNV specific densities over
#' the assigned CCFs during MCMC
#' @param S.i The mutation assignments to cluster ids
#' @param pi.h The cluster CCFs
#' @param no.iters.burn.in The number of iterations to discard as burn in
#' @return Square matrix with a row and column for every mutation, each cell contains the probability of the pair of mutations belonging to the same cluster
#' @author dw9, sd11
build_coassignment_prob_matrix_densities = function(S.i, pi.h, no.iters.burn.in) {
snv_densities = get_snv_ccf_assignmnent_density(S.i, pi.h, no.iters.burn.in)
no.muts = length(snv_densities)
identity.strengths = array(0,c(no.muts,no.muts))
for (i in 1:(no.muts-1)) {
identity.strengths[i,i] = 1
for(j in (i+1):no.muts) {
if (is.na(snv_densities[[i]]) | is.na(snv_densities[[j]])) {
identity.strengths[i,j] = identity.strengths[j,i] = 0
} else {
identity.strengths[i,j] = identity.strengths[j,i] = 1-(sum(abs(snv_densities[[i]]-snv_densities[[j]])) / 2)
}
}
}
identity.strengths[no.muts,no.muts] = 1
return(identity.strengths)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.