#Read Maf if not readed outside report
if (!("maf" %in% names(dataForReport))) {
  if ("vc_nonSyn" %in% names(dataForReport)) {
      dataForReport[["maf"]]=read.maf(maf =dataForReport[["mafFile"]],vc_nonSyn=dataForReport[["vc_nonSyn"]],clinicalData=dataForReport[["clinicalData"]],verbose=FALSE)
  } else {
      dataForReport[["maf"]]=read.maf(maf =dataForReport[["mafFile"]],clinicalData=dataForReport[["clinicalData"]],verbose=FALSE)
  }
}

#if (! "reportOut" %in% names(dataForReport)) {
#   dataForReport[["reportOut"]]=paste0(basename(dataForReport[["mafFile"]]),".report.html")
#}

#if (!("genome" %in% names(dataForReport))) {
#   #BSgenome.Hsapiens.UCSC.hg19
#   dataForReport[["genome"]]="hg19"
#}


#dndscv.refdb
if (!("dndscv.refdb" %in% names(dataForReport))) {
  dataForReport[["dndscv.refdb"]]=NULL
}
if (is.null(dataForReport[["dndscv.refdb"]])) {
    warning(paste0("genome defined but dndscv.refdb not defined or not existed. Can't do dndscv"))
} else {
  if (dataForReport[["dndscv.refdb"]]!="hg19" & !(file.exists(dataForReport[["dndscv.refdb"]]))) {
    dataForReport[["dndscv.refdb"]]=NULL
    warning(paste0("dndscv.refdb defined but not existed:",dataForReport[["dndscv.refdb"]],". Can't do dndscv"))
  }
}

#Clinic data
if (!is.null(dataForReport[["clinicalFeatures"]][1])) {
  dataForReport[["clinicalFeatures"]]=intersect(dataForReport[["clinicalFeatures"]],colnames(dataForReport[["maf"]]@clinical.data))
  if (length(dataForReport[["clinicalFeatures"]])>0) {
    setkey(dataForReport[["maf"]]@clinical.data,"Tumor_Sample_Barcode")
#    dataForReport[["maf"]]@data<-cbind(dataForReport[["maf"]]@data,dataForReport[["maf"]]@clinical.data[.(dataForReport[["maf"]]@data$Tumor_Sample_Barcode),])
#    dataForReport[["maf"]]@maf.silent<-cbind(dataForReport[["maf"]]@maf.silent,dataForReport[["maf"]]@clinical.data[.(dataForReport[["maf"]]@maf.silent$Tumor_Sample_Barcode),])
    dataForReport[["maf"]]@data<-merge(dataForReport[["maf"]]@data,dataForReport[["maf"]]@clinical.data,by="Tumor_Sample_Barcode",all.x = TRUE,sort=FALSE)
    dataForReport[["maf"]]@maf.silent<-merge(dataForReport[["maf"]]@maf.silent,dataForReport[["maf"]]@clinical.data,by="Tumor_Sample_Barcode",all.x = TRUE,sort=FALSE)

    if (is.null(dataForReport[["performGroupSummary"]])) {
      dataForReport[["performGroupSummary"]]=TRUE
    }
    if (dataForReport[["performGroupSummary"]]) {
      message(paste0("\nDefined clinicalFeatures, will report GroupSummary\n"))
      if (!("GroupSummary.Rmd" %in% dataForReport[["reportModules"]])) {
        dataForReport[["reportModules"]]=c(dataForReport[["reportModules"]],"GroupSummary.Rmd")
      }
    }
  } else {
    stop(paste0("clinicalFeatures defined but not found in column names of clinicalData"))
  }
}

#Interested Genes: If defined, overlap with mutation genes. If not, use top 10 genes
if ("interestedGenes" %in% names(dataForReport) & !is.null(dataForReport[["interestedGenes"]])) {
  dataForReport[["interestedGenes"]] = intersect(dataForReport[["interestedGenes"]],getGeneSummary(x = dataForReport[["maf"]])[, Hugo_Symbol])
  if (length(dataForReport[["interestedGenes"]])==0) {
      warning("No overlap between predefined interestedGenes and genes with mutation. Use top 10 genes as interestedGenes.")
      dataForReport[["interestedGenes"]] = getGeneSummary(x = dataForReport[["maf"]])[1:10, Hugo_Symbol]
  }
} else {
  dataForReport[["interestedGenes"]] = getGeneSummary(x = dataForReport[["maf"]])[1:10, Hugo_Symbol]
}


#Vaf perform analysis Y/N and caculation
if (!("performVaf" %in% names(dataForReport)) || dataForReport[["performVaf"]]) { #Not defined or dataForReport[["performVaf"]]==TRUE
  dataForReport[["performVaf"]]=TRUE
  caculateVaf=FALSE

  if ("vafCol" %in% names(dataForReport)) { #defined vafCol
    vafCol=intersect(dataForReport[["vafCol"]],getFields(dataForReport[["maf"]]))
     if (length(vafCol)==0) { #defined vafCol NOT in maf data column
       message(paste0("Defined vafCol as ",dataForReport[["vafCol"]]," but not found in maf." ))
       caculateVaf=TRUE
     } else {
       dataForReport[["vafCol"]]=vafCol[1]
       caculateVaf=FALSE
     }
  } else { #Not defined vafCol
      caculateVaf=TRUE
  }
  if (caculateVaf) {
    if (all(c("t_alt_count") %in% getFields(dataForReport[["maf"]])) & any(c("t_ref_count","t_depth") %in% getFields(dataForReport[["maf"]]))) {
      message("Cacualting Vaf based on Alt/Depth or Alt/(Ref+Alt) count and recording in t_vaf column")
      dataForReport[["vafCol"]]="t_vaf"

      if ("t_depth" %in% getFields(dataForReport[["maf"]])) { #use Alt/Depth
          dataForReport[["maf"]]@data[,t_vaf:=dataForReport[["maf"]]@data[,t_alt_count]/as.integer(dataForReport[["maf"]]@data[,t_depth])]
          dataForReport[["maf"]]@maf.silent[,t_vaf:=dataForReport[["maf"]]@maf.silent[,t_alt_count]/as.integer(dataForReport[["maf"]]@maf.silent[,t_depth])]
      } else { #use Alt/(Ref+Alt)
          dataForReport[["maf"]]@data[,t_vaf:=dataForReport[["maf"]]@data[,t_alt_count]/(dataForReport[["maf"]]@data[,t_alt_count]+dataForReport[["maf"]]@data[,t_ref_count])]
          dataForReport[["maf"]]@maf.silent[,t_vaf:=dataForReport[["maf"]]@maf.silent[,t_alt_count]/(dataForReport[["maf"]]@maf.silent[,t_alt_count]+dataForReport[["maf"]]@maf.silent[,t_ref_count])]
      }

    } else {
      warning(paste0("Try to cacualte Vaf based on Alt/Depth count"," but t_alt_count or t_depth or t_ref_count not found in maf." ))
      dataForReport[["performVaf"]]=FALSE
    }
  }
} else {  #dataForReport[["performVaf"]]==FALSE
  dataForReport[["performVaf"]]=FALSE
}


if (!("performAnalysis" %in% names(dataForReport))) {
  dataForReport[["performAnalysis"]] = TRUE
}

if (!("AACol" %in% names(dataForReport))) {
  preDefinedAACol=c("HGVSp_Short", "Protein_Change","aaChange", "AAChange")
  temp=intersect(preDefinedAACol,colnames(dataForReport[["maf"]]@data))
  if (length(temp)>0) {
    dataForReport[["AACol"]]=temp[1]
  } else {
    stop(paste0("Need provide AACol or maf file colnames should have one of ", paste0(preDefinedAACol,collapse=";")))
  }
}

if (!("vIdCol" %in% names(dataForReport))) {
  dataForReport[["vIdCol"]]=c("Hugo_Symbol",dataForReport[["AACol"]],"Chromosome","Start_Position","End_Position","Reference_Allele","Tumor_Seq_Allele2")
}

#summary variant
dataForReport[["mafProcessed"]]$variantCountSummary=summaryVariant(dataForReport[["maf"]],vIdCol=dataForReport[["vIdCol"]])
#summary variant for gene AA change level OncoPlot
dataForReport[["mafProcessed"]]$variantMafOncoPlot=prepareVariantOncoPlot(dataForReport[["maf"]],AACol=dataForReport[["AACol"]])


slzhao/mafreport documentation built on April 29, 2023, 8:39 a.m.