#--------------------------------------------------------#
# Collect metrics for a run of Battenberg and DPClust
#--------------------------------------------------------#
# Metric collection done per sample
# Probably good to summarise all runs so can quickly identify those which still have a few catch up
# runs to do. This will mean loads of columns though. Hm.
# DPC info collected per sample is:
# And this is done for every run of DPC performed so we can see the improvement in the calls
# number mutations
# number mutations after 1pc removal
# number clusters
# number clusters after 1pc removal
# position clonal cluster (clonal = cluster closest to clonal)
# number mutations clonal cluster
# position top CCF cluster
# number mutations top CCF cluster
# position cluster most mutations
# number mutations cluster most mutations
# position superclonal cluster
# number mutations superclonal cluster
# position biggest subclonal cluster
# number mutations biggest subclonal cluster
# is there a cluster at 50% in dpc
# position of 50% cluster
# number mutations of 50% cluster
# passed dpc
# BB info collected is:
# purity
# ploidy
# genome length
## number of segments with a clonal aberration
## length of genome with a clonal aberration
## number of segments with a subclonal aberration
## length of genome with a subclonal aberration
## number of segments with a clonal LOH
## length of genome with a clonal LOH
## number of segments with a subclonal LOH
## length of genome with a subclonal LOH
## number of segments clonally 1+1
# genome clonally 1+1
##number of segments subclonally 1+1
# genome subclonally 1+1
##number of segments clonally 2+2
# genome clonally 2+2
##number of segments subclonally 2+2
# genome subclonally 2+2
# number of segments that is clonal where either major or minor copy number is odd
# length of genome that has frac1A=1, ie clonal, where either the major or the minor copy number is odd
# number of segments at 50pc (between 0.95/2 ± 0.05)
# length of genome at 50pc
# number mutations on 1 chrom copy
# number mutations on 2 chrom copy
# number mutations on >1 chrom copy
# number of segments clonal homdels
# total length bp clonal homdels
# longest clonal homdel bp
# number of segments subclonal homdels
# total length bp subclonal homdels
# longest subclonal homdel bp
# number of mutations in regions that are clonally homdel
# test
# sampledir = "/home/AFrangou/colorectal/battenberg_results_fixvaf/tumoX_normY"
# participantid = "ID"
# tumourplatekey = "tumoX"
# normalplatekey = "normY"
# run = 1
# lowerclonal = 0.95
# upperclonal = 1.05
AssessBBDPCRun <- function(sampledir,participantid,tumourplatekey,normalplatekey,run,lowerclonal,upperclonal) {
tumournormal = paste0('tumo',tumourplatekey,'_norm',normalplatekey)
sampletumnorm = paste0(participantid,"_",tumournormal)
print(participantid)
print(tumournormal)
print(paste("This is run",run))
# get dpc run optima info
if (run==1) {
dpcdir = "/P-DPC1/DPClust/"
dpcprepdir = "/P-DPC1/DPPrep/"
subdir = "/O-Postprocessing/"
puritydir = "/M-CallSubclones/"
assessmentdir = "/Q-AssessBB1DPC1/"
dir.create(paste0(sampledir,"/Q-AssessBB1DPC1/"))
} else if (run==2) {
dpcdir = "/S-DPC2/DPClust/"
dpcprepdir = "/S-DPC2/DPPrep/"
subdir = "/R-BB2/O-Postprocessing/"
puritydir = "/R-BB2/M-CallSubclones/"
assessmentdir = "/T-AssessBB2DPC2/"
dir.create(paste0(sampledir,"/T-AssessBB2DPC2/"))
} else if (run==3) {
dpcdir = "/V-DPC3/DPClust/"
dpcprepdir = "/V-DPC3/DPPrep/"
subdir = "/U-BB3/O-Postprocessing/"
puritydir = "/U-BB3/M-CallSubclones/"
assessmentdir = "/W-AssessBB3DPC3/"
dir.create(paste0(sampledir,"/W-AssessBB3DPC3/"))
} else if (run==4) {
dpcdir = "/Y-DPC4/DPClust/"
dpcprepdir = "/Y-DPC4/DPPrep/"
subdir = "/X-BB4/O-Postprocessing/"
puritydir = "/X-BB4/M-CallSubclones/"
assessmentdir = "/Z-AssessBB4DPC4/"
dir.create(paste0(sampledir,"/Z-AssessBB4DPC4/"))
} else if (run=="WGD") {
if (file.exists(paste0(sampledir,"/Z-AssessBB4DPC4/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/X-BB4/O-Postprocessing/"
puritydir = "/X-BB4/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
} else if (file.exists(paste0(sampledir,"/W-AssessBB3DPC3/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/U-BB3/O-Postprocessing/"
puritydir = "/U-BB3/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
} else if (file.exists(paste0(sampledir,"/T-AssessBB2DPC2/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/R-BB2/O-Postprocessing/"
puritydir = "/R-BB2/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
} else if (file.exists(paste0(sampledir,"/Q-AssessBB1DPC1/",tumourplatekey,"_peaks_output.Rdata"))) {
dpcoveralldir = "/ZZ-DPC_WGD/"
dpcdir = "/ZZ-DPC_WGD/DPClust/"
dpcprepdir = "/ZZ-DPC_WGD/DPPrep/"
subdir = "/O-Postprocessing/"
puritydir = "/M-CallSubclones/"
assessmentdir = "/ZZ-AssessBBDPC_WGD/"
}
}
dpcfilename=paste0(sampledir,dpcdir,tumournormal,"_optimaInfo.txt")
if (file.exists(dpcfilename)) {
optima = read.table(dpcfilename,header=T,stringsAsFactors = F)
# some output
print(paste("Loaded dpclust optima info file",tumourplatekey,dpcfilename))
# collect dpc metrics across all runs that were done for this sample
dpcmetrics = matrix(NA,nrow=0,ncol=18)
number.mutations=sum(as.numeric(optima[,3]))
number.clusters=nrow(optima)
# remove any cluster with <-1% mutations in
toremove=which(optima[,3]<=number.mutations/100)
if(length(toremove)>0) {optima=optima[-toremove,]}
# total mutations after removal of smaller clusters
number.mutations.post1pcremoval=sum(as.numeric(optima[,3]))
number.clusters.post1pcremoval=nrow(optima)
# position clonal cluster
dists=abs(optima[,2]-(upperclonal+lowerclonal)/2)
position.clonalcluster=optima[which(dists==min(dists)),2]
# number mutations clonal cluster
numbermutations.clonalcluster=optima[which(dists==min(dists)),3]
# position top ccf cluster
position.topccfcluster=optima[nrow(optima),2]
# number mutations top ccf cluster
numbermutations.topccfcluster=optima[nrow(optima),3]
# position cluster most mutations
# a v small number of samples with v few mutations have clusters with the same number
# of mutations in them. choose the top CCF cluster in this situation.
most.muts=which(optima[,3]==max(optima[,3]))
if (length(most.muts)>1) {
position.clustermostmutations=optima[most.muts[length(most.muts)],2]
} else {
position.clustermostmutations=optima[which(optima[,3]==max(optima[,3])),2]
}
# number mutations cluster with most mutations
if (length(most.muts)>1) {
numbermutations.clustermostmutations=optima[most.muts[length(most.muts)],3]
} else {
numbermutations.clustermostmutations=optima[which(optima[,3]==max(optima[,3])),3]
}
# position super clonal cluster
# number mutations superclonal cluster
superclonal=which(optima[,2]>upperclonal)
if(length(superclonal)>0) {
position.superclonalcluster=optima[max(superclonal),2]
numbermutations.superclonalcluster=optima[max(superclonal),3]
} else {
position.superclonalcluster=0
numbermutations.superclonalcluster=0
}
# position biggest subclonal cluster
# number mutations biggest subclonal cluster
subclonal=which(optima[,2]<lowerclonal)
if (length(subclonal)==0) {
position.biggestsubclonalcluster=0
numbermutations.biggestsubclonalcluster=0
} else if (length(subclonal)==1) {
position.biggestsubclonalcluster=optima[which(optima[subclonal,3]==max(optima[subclonal,3])),2]
numbermutations.biggestsubclonalcluster=optima[which(optima[subclonal,3]==max(optima[subclonal,3])),3]
} else if (length(subclonal)>1) {
subclonal.clusters=which(optima[subclonal,3]==max(optima[subclonal,3]))
position.biggestsubclonalcluster=optima[subclonal.clusters[length(subclonal.clusters)],2]
numbermutations.biggestsubclonalcluster=optima[subclonal.clusters[length(subclonal.clusters)],3]
}
# ask if the cluster closest to clonal has the largest number of mutations or has the highest CCF.
if ((optima[which(dists==min(dists)),3]==max(optima[,3]) | optima[which(dists==min(dists)),3]==optima[nrow(optima),3]) & (position.clonalcluster>=lowerclonal & position.clonalcluster<=upperclonal)) {
passeddpc="Yes"
} else {
passeddpc="No"
}
# information about number of chromosomes each mutation is present on
# comes from dpinput file
dpcprepfilename = paste0(sampledir,dpcprepdir,tumournormal,"_DPinput.txt")
dpcprep = read.table(dpcprepfilename,header=T,stringsAsFactors = F)
print(paste("Read dpcprepfile",dpcprepfilename))
# number of mutations present on one chr
no.muts.chr1 = length(which(dpcprep[,16]==1))
# number of mutations present on two chrs
no.muts.chr2 = length(which(dpcprep[,16]==2))
# number of mutations present on more than one chrs
no.muts.chrmore1 = length(which(dpcprep[,16]>1))
# is there a cluster around 0.5 of the clonal cluster
upperbound = (position.clonalcluster/2)+0.05
lowerbound = (position.clonalcluster/2)-0.05
if (length(which(optima$location<upperbound & optima$location>lowerbound))==0) {
cluster.at.50 = FALSE
position.cluster.at.50 = FALSE
number.muts.cluster.at.50 = FALSE
} else {
cluster.at.50 = TRUE
position.cluster.at.50 = optima[which(optima$location<upperbound & optima$location>lowerbound),2]
number.muts.cluster.at.50 = optima[which(optima$location<upperbound & optima$location>lowerbound),3]
}
dpcmetrics = cbind(number.mutations,number.mutations.post1pcremoval,number.clusters,number.clusters.post1pcremoval,
position.clonalcluster,numbermutations.clonalcluster,position.topccfcluster,numbermutations.topccfcluster,
position.clustermostmutations,numbermutations.clustermostmutations,position.superclonalcluster,
numbermutations.superclonalcluster,position.biggestsubclonalcluster,numbermutations.biggestsubclonalcluster,
no.muts.chr1,no.muts.chr2,no.muts.chrmore1,cluster.at.50,position.cluster.at.50,number.muts.cluster.at.50,
passeddpc)
print(paste("Collected DPClust metrics"))
} else {
dpcmetricscolnames = c("number.mutations","number.mutations.post1pcremoval","number.clusters","number.clusters.post1pcremoval",
"position.clonalcluster","numbermutations.clonalcluster","position.topccfcluster","numbermutations.topccfcluster",
"position.clustermostmutations","numbermutations.clustermostmutations","position.superclonalcluster",
"numbermutations.superclonalcluster","position.biggestsubclonalcluster","numbermutations.biggestsubclonalcluster",
"no.muts.chr1","no.muts.chr2","no.muts.chrmore1","cluster.at.50","position.cluster.at.50","number.muts.cluster.at.50",
"passeddpc")
dpcmetrics =matrix(NA,1,21)
colnames(dpcmetrics)=dpcmetricscolnames
print(paste("No DPClust runs completed"))
}
# load subclones and purity files from BB
subfilename = paste0(sampledir,subdir,tumourplatekey,"_subclones_hg38.txt")
purityfilename = paste0(sampledir,puritydir,tumourplatekey,"_cellularity_ploidy.txt")
if (file.exists(subfilename)) {
sub = read.table(subfilename,header=T,stringsAsFactors = F)
# some output
print(paste("Loaded subclones hg38 file",tumourplatekey,subfilename))
purityploidy = read.table(purityfilename,header=T,stringsAsFactors = F)
# some output
print(paste("Loaded cellularity file",tumourplatekey,purityfilename))
# purity and ploidy
purity = purityploidy[1,1]
ploidy = purityploidy[1,3]
# number of segments
segments = nrow(sub)
# genome length
genome.length = as.numeric(sub[,3])-as.numeric(sub[,2])
genome.length = sum(as.numeric(genome.length))
# number of segments clonally diploid
clonallydiploid = sub[which(sub[,8]==1 & sub[,9]==1 & sub[,10]==1),]
segments.clonallydiploid = nrow(clonallydiploid)
# genome length clonally diploid
length.clonallydiploid = sum(as.numeric(clonallydiploid[,3])-as.numeric(clonallydiploid[,2]))
# number of segments subclonally diploid
subclonallydiploid = sub[which((sub[,8]==1 & sub[,9]==1 & sub[,10]!=1) | sub[,11]==1 & sub[,12]==1),]
segments.subclonallydiploid = nrow(subclonallydiploid)
# genome length subclonally diploid
length.subclonallydiploid = sum(as.numeric(subclonallydiploid[,3])-as.numeric(subclonallydiploid[,2]))
# number of segments clonally tetraploid
clonallytetraploid = sub[which(sub[,8]==2 & sub[,9]==2 & sub[,10]==1),]
segments.clonallytetraploid = nrow(clonallytetraploid)
# genome length clonally tetraploid
length.clonallytetraploid = sum(as.numeric(clonallytetraploid[,3])-as.numeric(clonallytetraploid[,2]))
# number of segments subclonally tetraploid
subclonallytetraploid = sub[which((sub[,8]==2 & sub[,9]==2 & sub[,10]!=1) | sub[,11]==2 & sub[,12]==2),]
segments.subclonallytetraploid = nrow(subclonallytetraploid)
# genome length subclonally tetraploid
length.subclonallytetraploid = sum(as.numeric(subclonallytetraploid[,3])-as.numeric(subclonallytetraploid[,2]))
# number of segments with a clonal aberration
clonalaberration = sub[which(sub$frac1_A==1 & (sub$nMaj1_A!=1 | sub$nMin1_A!=1)),]
segments.clonalaberration = nrow(clonalaberration)
# length of genome with a clonal aberration
length.clonalaberration = sum(as.numeric(clonalaberration[,3]-clonalaberration[,2]))
# number of segments with a subclonal aberration (this just means any subclonality)
subclonalaberration = sub[which(sub$frac1_A!=1 & !is.na(sub$frac1_A)),]
segments.subclonalaberration = nrow(subclonalaberration)
# length of genome with a subclonal aberration
length.subclonalaberration = sum(as.numeric(subclonalaberration[,3]-subclonalaberration[,2]))
# number of segments with a clonal LOH
clonalLOH = sub[which(sub$frac1_A==1 & (sub$nMaj1_A==0 | sub$nMin1_A==0)),]
segments.clonalLOH = nrow(clonalLOH)
# length of genome with a clonal LOH
length.clonalLOH = sum(as.numeric(clonalLOH[,3]-clonalLOH[,2]))
# number of segments with a subclonal LOH
subclonalLOH = sub[which(sub$frac1_A!=1 & (sub$nMaj1_A==0 | sub$nMin1_A==0 | sub$nMaj2_A==0 | sub$nMin2_A==0)),]
segments.subclonalLOH = nrow(subclonalLOH)
# length of genome with a subclonal LOH
length.subclonalLOH = sum(as.numeric(subclonalLOH[,3]-subclonalLOH[,2]))
# number of clonal segments where either major or minor copy number is odd
clonalodd = sub[which(sub$frac1_A==1 & (sub$nMaj1_A%%2!=0 | sub$nMin1_A%%2!=0)),]
segments.clonalodd = nrow(clonalodd)
# length of clonal segments where either major or minor copy number is odd
length.clonalodd = sum(as.numeric(clonalodd[,3]-clonalodd[,2]))
# number of segments with 2 50% subclones (we are using 0.95 as our 1 for this dataset)
midpoint = (upperclonal+lowerclonal)/2/2
fiftylower = midpoint-0.025
fiftyupper = midpoint+0.025
fiftypcsubclones = sub[which(sub$frac1_A<fiftyupper & sub$frac1_A>fiftylower),]
segments.fiftypcsubclones = nrow(fiftypcsubclones)
# length of segments with 2 50% subclones (we are using 0.95 as our 1 for this dataset)
length.fiftypcsubclones = sum(as.numeric(fiftypcsubclones[,3]-fiftypcsubclones[,2]))
# number of segments that are clonally homdel
clonalhomdels = sub[which(sub$frac1_A==1 & sub$nMaj1_A==0 & sub$nMin1_A==0),]
segments.clonalhomdels = nrow(clonalhomdels)
# length clonal homdels
length.clonalhomdels = sum(as.numeric(clonalhomdels[,3]-clonalhomdels[,2]))
# max length clonal homdel
if(segments.clonalhomdels>0){
maxlength.clonalhomdels = max(clonalhomdels[,3]-clonalhomdels[,2])
} else {
maxlength.clonalhomdels =0
}
# number of segments that are subclonally homdel
subclonalhomdels = sub[which(sub$frac1_A!=1 & ((sub$nMaj1_A==0 & sub$nMin1_A==0) | (sub$nMaj2_A==0 & sub$nMin2_A==0))),]
segments.subclonalhomdels = nrow(subclonalhomdels)
# total length of subclonal homdels
length.subclonalhomdels = sum(as.numeric(subclonalhomdels[,3]-subclonalhomdels[,2]))
# max length subclonal homdel
if(segments.subclonalhomdels>0){
maxlength.subclonalhomdels = max(subclonalhomdels[,3]-subclonalhomdels[,2])
} else {
maxlength.subclonalhomdels = 0
}
bbmetrics = cbind(purity,ploidy,segments,genome.length,segments.clonallydiploid,length.clonallydiploid,
segments.subclonallydiploid,length.subclonallydiploid,
segments.clonallytetraploid,length.clonallytetraploid,
segments.subclonallytetraploid,length.subclonallytetraploid,
segments.clonalaberration,length.clonalaberration,
segments.subclonalaberration,length.subclonalaberration,
segments.clonalLOH,length.clonalLOH,
segments.subclonalLOH,length.subclonalLOH,
segments.clonalodd,length.clonalodd,
segments.fiftypcsubclones,length.fiftypcsubclones,
segments.clonalhomdels,length.clonalhomdels,maxlength.clonalhomdels,
segments.subclonalhomdels,length.subclonalhomdels,maxlength.subclonalhomdels)
} else {
bbmetricscolnames = c("purity","ploidy","segments","genome.length","segments.clonallydiploid","length.clonallydiploid",
"segments.subclonallydiploid","length.subclonallydiploid",
"segments.clonallytetraploid","length.clonallytetraploid",
"segments.subclonallytetraploid","length.subclonallytetraploid",
"segments.clonalaberration","length.clonalaberration",
"segments.subclonalaberration","length.subclonalaberration",
"segments.clonalLOH","length.clonalLOH",
"segments.subclonalLOH","length.subclonalLOH",
"segments.clonalodd","length.clonalodd",
"segments.fiftypcsubclones","length.fiftypcsubclones",
"segments.clonalhomdels","length.clonalhomdels","maxlength.clonalhomdels",
"segments.subclonalhomdels","length.subclonalhomdels","maxlength.subclonalhomdels")
bbmetrics =matrix(NA,1,30)
colnames(bbmetrics)=bbmetricscolnames
print(paste("No BB runs completed"))
}
# combine dpc and bb metrics for this sample
samplemetrics = as.data.frame(cbind(participantid,tumourplatekey,normalplatekey,bbmetrics,dpcmetrics))
# write metrics table for this call
write.csv(samplemetrics,paste0(sampledir,assessmentdir,tumourplatekey,"_metrics_run",run),
row.names = F,
quote = F)
# write a PASS or FAIL file dependent on DPClust output
if (samplemetrics$passeddpc=="Yes") {
write.csv("PASS",paste0(sampledir,assessmentdir,"PASS"),quote=F,row.names = F)
} else if (samplemetrics$passeddpc=="No") {
write.csv("FAIL",paste0(sampledir,assessmentdir,"FAIL"),quote=F,row.names = F)
}
print(paste("Written final metrics and indicator pass/fail file for",tumourplatekey))
return(samplemetrics)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.