Analysis

Somatic Interactions

try(somaticInteractions(maf = dataForReport[["maf"]], top = 10,genes=dataForReport[["interestedGenes"]], pvalue = c(0.05, 0.1)))
if (dataForReport[["genome"]] %in% c("hg18","hg19","hg38")) {
  cat("## Detecting cancer driver genes based on positional clustering\n")
  mafOncodrive = try(oncodrive(maf = dataForReport[["maf"]], AACol = dataForReport[["AACol"]], minMut = 5, pvalMethod = 'zscore'))
}
if (class(mafOncodrive)[1]=="try-error") { #error in oncodrive, usually because of not enough genes with many mutations, can change minMut to smaller value.

} else {
  if (dataForReport[["genome"]] %in% c("hg18","hg19","hg38")) {
  makeDataTable(mafOncodrive[,c("Hugo_Symbol",setdiff(colnames(mafOncodrive),colnames(dataForReport[["maf"]]@gene.summary))),with = FALSE])

  fdrCut=max(sort(mafOncodrive$fdr)[5]+0.00000001,0.05) #make sure fdrCut can include at least 5 points for plot

  if (nrow(mafOncodrive)>200) {
    plotInd=1:200
    warning(paste0("Too many Oncodrive clusters: ",nrow(mafOncodrive),". Only top 200 were plotted. See table above for more details."))
    print(paste0("Too many Oncodrive clusters: ",nrow(mafOncodrive),". Only top 200 were plotted. See table above for more details."))
  } else {
    plotInd=1:nrow(mafOncodrive)
  }
  if (0) { #Too slow now. Not to do plot at this time
    plotOncodrive(res = mafOncodrive[plotInd,], fdrCutOff = fdrCut, useFraction = TRUE)
  }
  }
}
if (dataForReport[["genome"]] %in% c("hg18","hg19","hg38")) {
  cat("## Summarizing pfam domains\n")
  mafObj=dataForReport[["maf"]]
  #remove not Standard AaChange. Something like "wholegene" will report error. Add ; behind it to aviod it
  mafObj@data[[dataForReport[["AACol"]]]]=gsub("^[A-Za-z]+$","\\1;",dataForReport[["maf"]]@data[[dataForReport[["AACol"]]]])
  mafPfamDomains= pfamDomains(maf = mafObj, AACol = dataForReport[["AACol"]], top = 10)
}
if (dataForReport[["genome"]] %in% c("hg18","hg19","hg38")) {
  cat("## Drug-Gene Interactions\n")
  mafDrugInteractions = drugInteractions(maf = dataForReport[["maf"]], fontSize = 0.75)
}
if (dataForReport[["genome"]] %in% c("hg18","hg19","hg38")) {
  cat("## Oncogenic Signaling Pathways\n")
  mafOncogenicPathways = capture.output(OncogenicPathways(maf = dataForReport[["maf"]]))
#plot(mafOncogenicPathways)
  mafOncogenicPathways=fread(text=mafOncogenicPathways)
  if (nrow(mafOncogenicPathways)>0) {
    dataForPlot=head(mafOncogenicPathways,3)
    for (i in 1:nrow(dataForPlot)) {
      (PlotOncogenicPathways(maf = dataForReport[["maf"]], pathways = dataForPlot$Pathway[i]))
    }
  }

}
if (dataForReport[["performVaf"]]) {
  cat("## Tumor heterogeneity and MATH scores")
  mafInferHeterogeneity = inferHeterogeneity(maf = dataForReport[["maf"]], vafCol = dataForReport[["vafCol"]])

  #makeDataTable(mafInferHeterogeneity$clusterMeans)
  out=list(makeDataTable(mafInferHeterogeneity$clusterMeans))
  print(tagList(out))

  plotClusters(clusters = mafInferHeterogeneity)
}

Mutational Signatures

if (dataForReport[["genome"]]=="mm10") {
  genomePackage=paste0("BSgenome.Mmusculus.UCSC.",dataForReport[["genome"]])
} else {
  genomePackage=paste0("BSgenome.Hsapiens.UCSC.",dataForReport[["genome"]])
}

library(genomePackage, quietly = TRUE,character.only=TRUE)
#mafTnm = trinucleotideMatrix(maf = dataForReport[["maf"]], prefix = 'chr', add = TRUE, ref_genome =dataForReport[["genomePackage"]])

#check "chr" issue. Please note we changed seqlevelsStyle of maf file. May influence other analysis in future. So need to use another variable
genomePkgStype=suppressWarnings(GenomeInfoDb::seqlevelsStyle(BSgenome::getBSgenome(genome =genomePackage)))
mafObj=dataForReport[["maf"]]
mafObj@data$Chromosome=as.character(mafObj@data$Chromosome)
mafObj@maf.silent$Chromosome=as.character(mafObj@maf.silent$Chromosome)
GenomeInfoDb::seqlevelsStyle(mafObj@data$Chromosome)=genomePkgStype
GenomeInfoDb::seqlevelsStyle(mafObj@maf.silent$Chromosome)=genomePkgStype

mafTnm = trinucleotideMatrix(maf = mafObj,ref_genome =genomePackage)
try(plotApobecDiff(tnm = mafTnm, maf = mafObj))

library('NMF')
##old vertsion
#mafTnmSign=try(extractSignatures(mat = mafTnm, plotBestFitRes = FALSE))
#try(plotSignatures(mafTnmSign, title_size = 1 ,yaxisLim=NA))

#with R4.0
mafTnmSign = try(estimateSignatures(mat = mafTnm, nTry = 6))
#mafTnmSign = estimateSignatures(mat = mafTnm, nTry = 6, pConstant = 0.1, plotBestFitRes = FALSE, parallel = 2)
#plotCophenetic(res = mafTnmSign)
mafTnmSign = try(extractSignatures(mat = mafTnm, n = 5))

if (class(mafTnmSign)[1]!="try-error") {
  #Compate against original 30 signatures 
  mafTnmSign.og30.cosm = compareSignatures(nmfRes = mafTnmSign, sig_db = "legacy")
  #Compate against updated version3 60 signatures 
  mafTnmSign.v3.cosm = compareSignatures(nmfRes = mafTnmSign, sig_db = "SBS")

  library('pheatmap')
  pheatmap::pheatmap(mat = mafTnmSign.v3.cosm$cosine_similarities, cluster_rows = FALSE, main = "cosine similarity against validated signatures")

  ## ---- fig.width=6, fig.height=4, fig.align='center', eval = T-----------------
  maftools::plotSignatures(nmfRes = mafTnmSign, title_size = 1.2, sig_db = "SBS")

  #plot contribution to each sample
  library(reshape2)
  library(ggplot2)

  dataForPlot=reshape2::melt(mafTnmSign$contributions)
  colnames(dataForPlot)=c("Signature","Sample","Contribution")
  plot(ggplot(dataForPlot,aes(x=Sample,y=Contribution,fill=Signature))+geom_bar(stat = "identity")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))

}
if (!is.null(dataForReport[["dndscv.refdb"]])) {
  cat("## dndscv\n")

#  library(dndscv)
  mafDatSnp=rbind(dataForReport[["maf"]]@data,dataForReport[["maf"]]@maf.silent)
  #mafDatSnp=mafDatSnp[which(mafDatSnp$Variant_Type=="SNP"),]

  #remove In_Frame_Del or In_Frame_Ins in DnDs analysis if they were not considered as vc_nonSyn
  if (!("In_Frame_Del" %in% dataForReport$vc_nonSyn)) {
    mafDatSnp=mafDatSnp[which(! mafDatSnp$Variant_Classification %in% c("In_Frame_Del")),]
  }
  if (!("In_Frame_Ins" %in% dataForReport$vc_nonSyn)) {
    mafDatSnp=mafDatSnp[which(! mafDatSnp$Variant_Classification %in% c("In_Frame_Ins")),]
  }

  dataForModel=mafDatSnp[,.(Tumor_Sample_Barcode ,Chromosome,Start_Position,Reference_Allele,Tumor_Seq_Allele2)]
  dataForModel$Chromosome=as.character(dataForModel$Chromosome)

  #check chr style in data
  if (dataForReport[["dndscv.refdb"]] == "hg19") {
     data("refcds_hg19", package = "dndscv")
  } else {
     load(dataForReport[["dndscv.refdb"]])
  }
  genomePkgStype=GenomeInfoDb::seqlevelsStyle(sapply(RefCDS,function(x) x$chr))
  GenomeInfoDb::seqlevelsStyle(dataForModel$Chromosome)=genomePkgStype
  #colnames(dataForModel)<-c("sampleID","chr","pos","ref","alt")

  dndsout=try(dndscv::dndscv(dataForModel,cv=NULL,refdb=dataForReport[["dndscv.refdb"]],use_indel_sites=FALSE))
  #print(names(dndsout))

  if (class(dndsout)[1]!="try-error") {
    makeDataTable(dndsout$sel_cv[1:500,])
  }

#  kable(dndsout$globaldnds,row.names=FALSE)
}

if (!is.null(dataForReport[["dndscv.refdb"]])) {  #have to be in a seprate section to make sure makeDataTable(dndsout$sel_cv[1:500,]) can display
  if (class(dndsout)[1]!="try-error") {
    kable(dndsout$globaldnds,row.names=FALSE)
  }
}

sciClone

mafObj=dataForReport[["maf"]]
vafCol=dataForReport$vafCol
sciCloneResultDir=paste0(dataForReport$reportOutDir,"/sciClone/")
dir.create(sciCloneResultDir)

colToExtract=c("Chromosome","Start_Position","t_ref_count","t_alt_count",vafCol,"Tumor_Sample_Barcode")

##chr, pos, ref_reads, var_reads, vaf
mutationTableAll=rbind(mafObj@data[Variant_Type=="SNP"][,..colToExtract],mafObj@maf.silent[Variant_Type=="SNP"][,..colToExtract])
mutationTableAll$tumor_f=mutationTableAll$tumor_f*100   #VAF is 0 -100 in sciClone
mutationTableAll=as.data.frame(mutationTableAll)
mutationTableAll$Tumor_Sample_Barcode=as.character(mutationTableAll$Tumor_Sample_Barcode)


library(sciClone)

#allsamples run sciClone one by one
for (sampleOne in unique(mutationTableAll$Tumor_Sample_Barcode)) {
  vafInpuTable=mutationTableAll[which(mutationTableAll$Tumor_Sample_Barcode==sampleOne),head(colToExtract,5)]
  sc = try(sciClone(vafs=vafInpuTable,
              #minimumDepth=50,
              sampleNames=sampleOne,
              #cnCallsAreLog2=TRUE,
              maximumClusters=6))
  if (is.null(sc)) { #error
    next
  }
  sciCloneOutput=paste0(sciCloneResultDir,sampleOne,".sciClone")

  writeClusterTable(sc, paste0(sciCloneOutput,".txt"))
  sc.plot1d(sc,paste0(sciCloneOutput,".clusterVis1D.pdf"))
}


#summary results
sciCloneResultFiles=list.files(sciCloneResultDir,pattern=".sciClone.txt$")
sciCloneResultSummary=NULL
for (sciCloneResultFile in sciCloneResultFiles) {
  sampleName=gsub(".sciClone.txt$","",sciCloneResultFile)
  sciCloneResultTable=read.delim(paste0(sciCloneResultDir,sciCloneResultFile),header=T,as.is=T)

  totalMutationN=nrow(sciCloneResultTable)
  totalClusterN=max(sciCloneResultTable$cluster,na.rm = TRUE)
  totalMutationNInCluster=table(sciCloneResultTable$cluster)
  totalMutationNInCluster=totalMutationNInCluster[which(names(totalMutationNInCluster)!=0)] #cluster 0 is outlier
  totalMutationNInCluster=paste(totalMutationNInCluster,collapse=";")

  sciCloneResultSummary=rbind(sciCloneResultSummary,c(sampleName,totalMutationN,totalClusterN,totalMutationNInCluster))
}
colnames(sciCloneResultSummary)=c("Sample","TotalVariants","TotalCluster","VariantsInEachCluster")

write.csv(sciCloneResultSummary, paste0(sciCloneResultDir,"sciCloneResultSummary.csv"),row.names=FALSE)


slzhao/mafreport documentation built on April 29, 2023, 8:39 a.m.