Group/Family summary

clinicalData=getClinicalData(dataForReport$maf)
makeDataTable(clinicalData)

Top 200 variants/genes will be listed in the tables:

if (!is.null(dataForReport[["clinicalFeatures"]][1])) {
  cat("## Variants in Family/Group Summary\n")

  maf=dataForReport[["maf"]]@data
  out<-list()

  for (clinicalFeaturesOne in dataForReport[["clinicalFeatures"]]) {
    #This is from clinic information data, too large sometimes, not using this way.
    #clinicalFeaturesOneSize=table(dataForReport[["maf"]]@clinical.data[,..clinicalFeaturesOne])
    selectedVars=c("Tumor_Sample_Barcode",clinicalFeaturesOne)
    temp=unique(maf[,..selectedVars])
    #temp=unique(maf[,.(Tumor_Sample_Barcode,get(clinicalFeaturesOne))])
    clinicalFeaturesOneSize=table(temp[,2])
    if (any(clinicalFeaturesOneSize<2)) {
      maf[[clinicalFeaturesOne]][which(maf[[clinicalFeaturesOne]] %in% names(which(clinicalFeaturesOneSize<2)))]=NA
    }

    #N
    #vt = maf[,.N, .(Hugo_Symbol,HGVSp_Short, family )]
#    cat(paste0("### ",clinicalFeaturesOne,"\n"))
    out<-append(out,list(tags[["h3"]](paste0(clinicalFeaturesOne))))
    vt = maf[,.N, c(dataForReport[["vIdCol"]],clinicalFeaturesOne)]
    castFormula=as.formula(paste0(paste(dataForReport[["vIdCol"]],collapse="+"),"~",clinicalFeaturesOne))
    vt.cast = data.table::dcast(data = vt, formula = castFormula, value.var = 'N', fill = 0)
#    vt.cast = vt.cast[,total:=rowSums(vt.cast[,(length(dataForReport[["vIdCol"]])+1):ncol(vt.cast), with = FALSE])][order(total, decreasing = TRUE)]
    vt.cast = vt.cast[,Occur:=apply(vt.cast[,(length(dataForReport[["vIdCol"]])+1):ncol(vt.cast), with = FALSE],1,function(x) length(which(x>0)))][order(Occur, decreasing = TRUE)]

    #add group size to group name
    temp=intersect(colnames(vt.cast),names(clinicalFeaturesOneSize))
    if (length(temp)>0) {
      nameInd=which(colnames(vt.cast) %in% temp)
      colnames(vt.cast)[nameInd]=paste0(colnames(vt.cast)[nameInd]," (",clinicalFeaturesOneSize[colnames(vt.cast)[nameInd]],")")
    }
    #Comment this as you don't know how many patients don't have family information
    # if ("NA" %in% colnames(vt.cast)) {
    #   colnames(vt.cast)[which(colnames(vt.cast)=="NA")]=paste0("Other or No Family (",sum(clinicalFeaturesOneSize)-sum(clinicalFeaturesOneSize[temp]),")")
    # }

    out<-append(out,list(makeDataTable(vt.cast[1:200,])))

    #Percent
#    temp=unique(maf[,.(Tumor_Sample_Barcode,get(clinicalFeaturesOne))])
#    clinicalFeaturesOneSize=table(temp[,2])
    vt$N.Percent=round(vt$N/clinicalFeaturesOneSize[vt[,get(clinicalFeaturesOne)]],2)
    vtCastPercent = data.table::dcast(data = vt, formula = castFormula, value.var = 'N.Percent', fill = 0)
    vtCastPercent = vtCastPercent[,OccurInAll:=apply(vtCastPercent[,(length(dataForReport[["vIdCol"]])+1):ncol(vtCastPercent), with = FALSE],1,function(x) length(which(x==1)))][order(OccurInAll, decreasing = TRUE)]

    #add group size to group name
    temp=intersect(colnames(vtCastPercent),names(clinicalFeaturesOneSize))
    if (length(temp)>0) {
      nameInd=which(colnames(vtCastPercent) %in% temp)
      colnames(vtCastPercent)[nameInd]=paste0(colnames(vtCastPercent)[nameInd]," (",clinicalFeaturesOneSize[colnames(vtCastPercent)[nameInd]],")")
    }
    out<-append(out,list(makeDataTable(vtCastPercent[1:200,])))

  }
  tagList(out)
}
cat("\n")

Oncoplots about variants, sort by Family/Group

for (clinicalFeaturesOne in dataForReport[["clinicalFeatures"]]) {
  clinicalFeaturesOneGroups=table((dataForReport[["mafProcessed"]]$variantMafOncoPlot@data[,..clinicalFeaturesOne]))
  clinicalFeaturesOneSize=length(which(clinicalFeaturesOneGroups>0))
  if (clinicalFeaturesOneSize==0) {
    print(paste0("clinicalFeature ",clinicalFeaturesOne," has too few unique groups: ",clinicalFeaturesOneSize, ". Skip Oncoplots"))
  }
  if (clinicalFeaturesOneSize<=99) {
    plot.new()
    oncoplot(maf = dataForReport[["mafProcessed"]]$variantMafOncoPlot, top = 10,removeNonMutated=FALSE,showTumorSampleBarcodes=TRUE,clinicalFeatures=clinicalFeaturesOne,sortByAnnotation=TRUE,barcode_mar=9,gene_mar=11)
  } else {
    print(paste0("clinicalFeature ",clinicalFeaturesOne," has too many groups: ",clinicalFeaturesOneSize, ". Reduce it to less than 99 for Oncoplots"))
  }

}
if (!is.null(dataForReport[["clinicalFeatures"]][1])) {
  cat("## Genes in Family/Group Summary\n")

  maf=dataForReport[["maf"]]@data
  out<-list()

  for (clinicalFeaturesOne in dataForReport[["clinicalFeatures"]]) {
    out<-append(out,list(tags[["h3"]](paste0(clinicalFeaturesOne))))

    temp=unique(maf[,.(Tumor_Sample_Barcode,get(clinicalFeaturesOne))])
    clinicalFeaturesOneSize=table(temp[,2])
    if (any(clinicalFeaturesOneSize<2)) {
      maf[[clinicalFeaturesOne]][which(maf[[clinicalFeaturesOne]] %in% names(which(clinicalFeaturesOneSize<2)))]=NA
    }

    #N
    #sometimes more than one variant on same gene on the patient, need to remove
    temp=c("Hugo_Symbol","Tumor_Sample_Barcode",clinicalFeaturesOne)
    vt=unique(maf[,..temp])
    vt = vt[,.N, c("Hugo_Symbol",clinicalFeaturesOne)]
    castFormula=as.formula(paste0("Hugo_Symbol","~",clinicalFeaturesOne))
    vt.cast = data.table::dcast(data = vt, formula = castFormula, value.var = 'N', fill = 0)
    vt.cast = vt.cast[,Occur:=apply(vt.cast[,2:ncol(vt.cast), with = FALSE],1,function(x) length(which(x>0)))][order(Occur, decreasing = TRUE)]
    #add group size to group name
    temp=intersect(colnames(vt.cast),names(clinicalFeaturesOneSize))
    if (length(temp)>0) {
      nameInd=which(colnames(vt.cast) %in% temp)
      colnames(vt.cast)[nameInd]=paste0(colnames(vt.cast)[nameInd]," (",clinicalFeaturesOneSize[colnames(vt.cast)[nameInd]],")")
    }
    out<-append(out,list(makeDataTable(vt.cast[1:200,])))

    #Percent
#    temp=unique(maf[,.(Tumor_Sample_Barcode,get(clinicalFeaturesOne))])
#    clinicalFeaturesOneSize=table(temp[,2])
    vt$N.Percent=round(vt$N/clinicalFeaturesOneSize[vt[,get(clinicalFeaturesOne)]],2)
    vtCastPercent = data.table::dcast(data = vt, formula = castFormula, value.var = 'N.Percent', fill = 0)
    vtCastPercent = vtCastPercent[,OccurInAll:=apply(vtCastPercent[,2:ncol(vtCastPercent), with = FALSE],1,function(x) length(which(x==1)))][order(OccurInAll, decreasing = TRUE)]
    #add group size to group name
    temp=intersect(colnames(vtCastPercent),names(clinicalFeaturesOneSize))
    if (length(temp)>0) {
      nameInd=which(colnames(vtCastPercent) %in% temp)
      colnames(vtCastPercent)[nameInd]=paste0(colnames(vtCastPercent)[nameInd]," (",clinicalFeaturesOneSize[colnames(vtCastPercent)[nameInd]],")")
    }

    out<-append(out,list(makeDataTable(vtCastPercent[1:200,])))

  }
  tagList(out)
}

Oncoplots about Genes, sort by Family/Group

for (clinicalFeaturesOne in dataForReport[["clinicalFeatures"]]) {
  clinicalFeaturesOneSize=length(table((dataForReport[["maf"]]@data[,..clinicalFeaturesOne])))
  if (clinicalFeaturesOneSize<=99) {
  plot.new()
  oncoplot(maf = dataForReport[["maf"]], top = 10,removeNonMutated=FALSE,showTumorSampleBarcodes=TRUE,clinicalFeatures=clinicalFeaturesOne,sortByAnnotation=TRUE,barcode_mar=9,gene_mar=9)
  } else {
    print(paste0("clinicalFeature ",clinicalFeaturesOne," has too many groups: ",clinicalFeaturesOneSize, ". Reduce it to less than 99 for Oncoplots"))
  }

}

Mutation burden by Family/Group

#test
testEachGroup=function(x,groups,test=wilcox.test) {
  uniqueGroups=unique(na.omit(groups))

  resultAll=NULL
  for (i in 1:(length(uniqueGroups)-1)) {
    for (j in (i+1):length(uniqueGroups)) {
      group1=uniqueGroups[i]
      group2=uniqueGroups[j]
#      print(paste0(group1, " vs ",group2))
#      print(x[which(groups==group1)])
#      print(x[which(groups==group2)])
      testP=test(x[which(groups==group1)],x[which(groups==group2)])$p.value
      resultAll=rbind(resultAll,c(group1,group2,round(testP,4)))
    }
  }
  colnames(resultAll)=c("Group1","Group2","Test P value")
  return(resultAll)
}


for (clinicalFeaturesOne in dataForReport[["clinicalFeatures"]]) {
  clinicalFeaturesOneSize=length(table((dataForReport[["maf"]]@data[,..clinicalFeaturesOne])))
  if (clinicalFeaturesOneSize<=6) { #maxmial 6 groups
    #mutationTypePerSampleCount=cbind(dataForReport[["maf"]]@variant.classification.summary,dataForReport[["maf"]]@variant.type.summary)
    mutationTypePerSampleCount=dataForReport[["maf"]]@variant.type.summary
    dataForPlot=cbind(mutationTypePerSampleCount,dataForReport[["maf"]]@clinical.data[as.character(mutationTypePerSampleCount$Tumor_Sample_Barcode),][[clinicalFeaturesOne]])
    names(dataForPlot)[ncol(dataForPlot)]=clinicalFeaturesOne

    for (varType in names(dataForPlot)[-c(1,ncol(dataForPlot))]) {
        #varType="total"
        p=ggplot(dataForPlot)+geom_boxplot(aes_string(x=clinicalFeaturesOne,y=varType))+ggtitle(varType)
        plot(p)
        result=testEachGroup(dataForPlot[[varType]],as.character(dataForPlot[[clinicalFeaturesOne]]),test=wilcox.test)
        print(knitr::kable(result,caption = paste0("Number of ",varType," mutations Comparison by Wilcox Rank Sum test")))
        cat("\n\n")
    }
  }

}


slzhao/mafreport documentation built on April 29, 2023, 8:39 a.m.