R/RunDPClust.R

Defines functions RunDPClust

Documented in RunDPClust

#--------------------------#
# Function to run DPClust
#--------------------------#

#' @name
#' RunDPClust_Beagle_nf
#'
#' @title
#' Runs DPClust using input provided in GenerateDPinputt2BamAC_Beagle_nf
#'
#' @description
#' Clusters mutations by CCF using Dirichlet Process
#'
#' @param
#' Tumour platekey
#' Germline platekey
#' Filepath to purity and ploidy file from Battenberg
#' Filepath to input file generated by GenerateDPinputt2BamAC_Beagle_nf
#' Output directory
#'
#' @return
#' Cluster assignment for all SNVs
#' Plots showing the clusters
#' OptimaInfo file summarising the clusters
#' And the CCF and number of mutations assigned to each


RunDPClust <- function(sampledir,participantid,tumourplatekey,normalplatekey,dpinfofilepath,gender,vcffilepath,rhoandpsifilepath,subcloneshg38filepath,run) {

        # define nucleotides
        nucleotides = c("A","C","G","T")
        # samplename
        samplename=paste0("tumo",tumourplatekey,"_norm",normalplatekey)
        # print sample running
        print(paste0(tumourplatekey," ",normalplatekey," preparing"))

        # get run directories
        if (run==1) {
                dpcoveralldir = "/P-DPC1/"
                dpcdir = "/P-DPC1/DPClust/"
                dpcprepdir = "/P-DPC1/DPPrep/"
                subdir = "/O-Postprocessing/"
                puritydir = "/M-CallSubclones/"
                assessmentdir = "/Q-AssessBB1DPC1/"
        } else if (run==2) {
                dpcoveralldir = "/S-DPC2/"
                dpcdir = "/S-DPC2/DPClust/"
                dpcprepdir = "/S-DPC2/DPPrep/"
                subdir = "/R-BB2/O-Postprocessing/"
                puritydir = "/R-BB2/M-CallSubclones/"
                assessmentdir = "/T-AssessBB2DPC2/"
        } else if (run==3) {
                dpcoveralldir = "/V-DPC3/"
                dpcdir = "/V-DPC3/DPClust/"
                dpcprepdir = "/V-DPC3/DPPrep/"
                subdir = "/U-BB3/O-Postprocessing/"
                puritydir = "/U-BB3/M-CallSubclones/"
                assessmentdir = "/W-AssessBB3DPC3/"
        } else if (run==4) {
                dpcoveralldir = "/Y-DPC4/"
                dpcdir = "/Y-DPC4/DPClust/"
                dpcprepdir = "/Y-DPC4/DPPrep/"
                subdir = "/X-BB4/O-Postprocessing/"
                puritydir = "/X-BB4/M-CallSubclones/"
                assessmentdir = "/Z-AssessBB4DPC4/"
        } else if (run=="WGD") {
          if (file.exists(paste0(sampledir,"/Z-AssessBB4DPC4/",tumourplatekey,"_peaks_output.Rdata"))) {
                dpcoveralldir = "/ZZ-DPC_WGD/"
                dpcdir = "/ZZ-DPC_WGD/DPClust/"
                dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
                subdir = "/X-BB4/O-Postprocessing/"
                puritydir = "/X-BB4/M-CallSubclones/"
                assessmentdir = "/ZZ-AssessBBDPC_WGD/"
          } else if (file.exists(paste0(sampledir,"/W-AssessBB3DPC3/",tumourplatekey,"_peaks_output.Rdata"))) {
                dpcoveralldir = "/ZZ-DPC_WGD/"
                dpcdir = "/ZZ-DPC_WGD/DPClust/"
                dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
                subdir = "/U-BB3/O-Postprocessing/"
                puritydir = "/U-BB3/M-CallSubclones/"
                assessmentdir = "/ZZ-AssessBBDPC_WGD/"
          } else if (file.exists(paste0(sampledir,"/T-AssessBB2DPC2/",tumourplatekey,"_peaks_output.Rdata"))) {
                dpcoveralldir = "/ZZ-DPC_WGD/"
                dpcdir = "/ZZ-DPC_WGD/DPClust/"
                dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
                subdir = "/R-BB2/O-Postprocessing/"
                puritydir = "/R-BB2/M-CallSubclones/"
                assessmentdir = "/ZZ-AssessBBDPC_WGD/"
          } else if (file.exists(paste0(sampledir,"/Q-AssessBB1DPC1/",tumourplatekey,"_peaks_output.Rdata"))) {
                dpcoveralldir = "/ZZ-DPC_WGD/"
                dpcdir = "/ZZ-DPC_WGD/DPClust/"
                dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
                subdir = "/O-Postprocessing/"
                puritydir = "/M-CallSubclones/"
                assessmentdir = "/ZZ-AssessBBDPC_WGD/"
          }
        }

        if(file.exists(rhoandpsifilepath) & file.exists(dpinfofilepath)){

	              cellularity = read.table(rhoandpsifilepath,header=T,stringsAsFactors=F,sep="\t")[2,1]

	              DP.info = read.table(dpinfofilepath,sep="\t",header=T, stringsAsFactors=F)

              	#filter out VAF=0 and mutations without copy number info
              	DP.info = DP.info[!is.na(DP.info$subclonal.fraction) & DP.info$subclonal.fraction>0,]

              	mutCount = array(DP.info$mut.count,c(nrow(DP.info),1))
              	WTCount = array(DP.info$WT.count,c(nrow(DP.info),1))
              	totalCopyNumber = array(DP.info$subclonal.CN,c(nrow(DP.info),1))
              	copyNumberAdjustment = array(DP.info$no.chrs.bearing.mut,c(nrow(DP.info),1))
              	mutation.copy.number = array(DP.info$mutation.copy.number,c(nrow(DP.info),1))

	              no.iters=1000

	              output_folder=paste0(sampledir,dpcdir)

	              print(paste("This is run",run))
	              print(paste("Using cellularity file",rhoandpsifilepath))
	              print(paste("Using dpinfo file",dpinfofilepath))
	              print(paste("Writing output to",output_folder))

                #This function does everything. It may be better to run separate functions to perform Gibbs sampling
	              # and mutation assignment
                DirichletProcessClustering(mutCount = mutCount,
                                           WTCount = WTCount,
              	                           totalCopyNumber = totalCopyNumber,
              	                           copyNumberAdjustment = copyNumberAdjustment,
              	                           mutation.copy.number = mutation.copy.number,
              	                           cellularity = cellularity,
              	                           output_folder = output_folder,
              	                           no.iters = no.iters,
              	                           no.iters.burn.in = ceiling(no.iters/5),
              	                           subsamplesrun = samplename,
              	                           samplename=samplename,
              	                           conc_param = 1,
              	                           cluster_conc = 5,
              	                           mut.assignment.type = 1,
              	                           most.similar.mut = NA,
              	                           mutationTypes = NA,
              	                           min.frac.snvs.cluster = NA,
              	                           max.considered.clusters=30)

	              print(paste0("Clustering has run for ",tumourplatekey,"_",normalplatekey))

	      } else {

	              print(paste0(rhoandpsifilepath," or ",dpinfofilepath," not found, exiting."))
        }

}
afrangou/CleanCNA documentation built on Dec. 28, 2021, 8:21 p.m.