#--------------------------#
# Function to run DPClust
#--------------------------#
#' @name
#' RunDPClust_Beagle_nf
#'
#' @title
#' Runs DPClust using input provided in GenerateDPinputt2BamAC_Beagle_nf
#'
#' @description
#' Clusters mutations by CCF using Dirichlet Process
#'
#' @param
#' Tumour platekey
#' Germline platekey
#' Filepath to purity and ploidy file from Battenberg
#' Filepath to input file generated by GenerateDPinputt2BamAC_Beagle_nf
#' Output directory
#'
#' @return
#' Cluster assignment for all SNVs
#' Plots showing the clusters
#' OptimaInfo file summarising the clusters
#' And the CCF and number of mutations assigned to each
RunDPClust <- function(sampledir,participantid,tumourplatekey,normalplatekey,dpinfofilepath,gender,vcffilepath,rhoandpsifilepath,subcloneshg38filepath,run) {
# define nucleotides
nucleotides = c("A","C","G","T")
# samplename
samplename=paste0("tumo",tumourplatekey,"_norm",normalplatekey)
# print sample running
print(paste0(tumourplatekey," ",normalplatekey," preparing"))
# get run directories
if (run==1) {
dpcoveralldir = "/P-DPC1/"
dpcdir = "/P-DPC1/DPClust/"
dpcprepdir = "/P-DPC1/DPPrep/"
subdir = "/O-Postprocessing/"
puritydir = "/M-CallSubclones/"
assessmentdir = "/Q-AssessBB1DPC1/"
} else if (run==2) {
dpcoveralldir = "/S-DPC2/"
dpcdir = "/S-DPC2/DPClust/"
dpcprepdir = "/S-DPC2/DPPrep/"
subdir = "/R-BB2/O-Postprocessing/"
puritydir = "/R-BB2/M-CallSubclones/"
assessmentdir = "/T-AssessBB2DPC2/"
} else if (run==3) {
dpcoveralldir = "/V-DPC3/"
dpcdir = "/V-DPC3/DPClust/"
dpcprepdir = "/V-DPC3/DPPrep/"
subdir = "/U-BB3/O-Postprocessing/"
puritydir = "/U-BB3/M-CallSubclones/"
assessmentdir = "/W-AssessBB3DPC3/"
} else if (run==4) {
dpcoveralldir = "/Y-DPC4/"
dpcdir = "/Y-DPC4/DPClust/"
dpcprepdir = "/Y-DPC4/DPPrep/"
subdir = "/X-BB4/O-Postprocessing/"
puritydir = "/X-BB4/M-CallSubclones/"
assessmentdir = "/Z-AssessBB4DPC4/"
} else if (run=="WGD") {
if (file.exists(paste0(sampledir,"/Z-AssessBB4DPC4/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/X-BB4/O-Postprocessing/"
puritydir = "/X-BB4/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
} else if (file.exists(paste0(sampledir,"/W-AssessBB3DPC3/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/U-BB3/O-Postprocessing/"
puritydir = "/U-BB3/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
} else if (file.exists(paste0(sampledir,"/T-AssessBB2DPC2/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/R-BB2/O-Postprocessing/"
puritydir = "/R-BB2/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
} else if (file.exists(paste0(sampledir,"/Q-AssessBB1DPC1/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/O-Postprocessing/"
puritydir = "/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
}
}
if(file.exists(rhoandpsifilepath) & file.exists(dpinfofilepath)){
cellularity = read.table(rhoandpsifilepath,header=T,stringsAsFactors=F,sep="\t")[2,1]
DP.info = read.table(dpinfofilepath,sep="\t",header=T, stringsAsFactors=F)
#filter out VAF=0 and mutations without copy number info
DP.info = DP.info[!is.na(DP.info$subclonal.fraction) & DP.info$subclonal.fraction>0,]
mutCount = array(DP.info$mut.count,c(nrow(DP.info),1))
WTCount = array(DP.info$WT.count,c(nrow(DP.info),1))
totalCopyNumber = array(DP.info$subclonal.CN,c(nrow(DP.info),1))
copyNumberAdjustment = array(DP.info$no.chrs.bearing.mut,c(nrow(DP.info),1))
mutation.copy.number = array(DP.info$mutation.copy.number,c(nrow(DP.info),1))
no.iters=1000
output_folder=paste0(sampledir,dpcdir)
print(paste("This is run",run))
print(paste("Using cellularity file",rhoandpsifilepath))
print(paste("Using dpinfo file",dpinfofilepath))
print(paste("Writing output to",output_folder))
#This function does everything. It may be better to run separate functions to perform Gibbs sampling
# and mutation assignment
DirichletProcessClustering(mutCount = mutCount,
WTCount = WTCount,
totalCopyNumber = totalCopyNumber,
copyNumberAdjustment = copyNumberAdjustment,
mutation.copy.number = mutation.copy.number,
cellularity = cellularity,
output_folder = output_folder,
no.iters = no.iters,
no.iters.burn.in = ceiling(no.iters/5),
subsamplesrun = samplename,
samplename=samplename,
conc_param = 1,
cluster_conc = 5,
mut.assignment.type = 1,
most.similar.mut = NA,
mutationTypes = NA,
min.frac.snvs.cluster = NA,
max.considered.clusters=30)
print(paste0("Clustering has run for ",tumourplatekey,"_",normalplatekey))
} else {
print(paste0(rhoandpsifilepath," or ",dpinfofilepath," not found, exiting."))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.