library(knitr)
opts_chunk$set(cache=FALSE)

#source("/home/zhaos/source/r_cqs/myPkg/R/htmlTable.R")

#packages
library(ggplot2)
library(formattable)
library(htmltools)

#functions used in the report
plotVar<-function(rawTable,varName="Call.rate",pattern=NULL,meltTable=TRUE,reportRace=FALSE) {
  if (is.null(pattern)) {
    pattern=paste0("^Race\\.\\w+\\.",varName)
  }
  if (reportRace) {
    if (meltTable) {
      selectedCol<-grep(pattern,colnames(rawTable))
      dataForPlot<-melt(rawTable[,..selectedCol],variable.name="Race",value.name=varName,measure=1:length(selectedCol))
    } else {
      dataForPlot<-rawTable
    }
    dataForPlot=subset(dataForPlot,!is.na(Race))
  } else {
    dataForPlot=rawTable
  }
  if (class(dataForPlot)=="DFrame" | class(dataForPlot)=="DataFrame") {
    dataForPlot=as.data.frame(dataForPlot)
  }
  p<-ggplot(dataForPlot, aes_string(varName)) +geom_histogram()
  if (reportRace) {
    p=p+facet_wrap(~Race,scales = "free")  
  }
  print(p)
}

Result Summary

if (!is.null(dataForReport$SummaryTable)) {
  makeFormatTable(dataForReport$SummaryTable)
}

Overall Call Rate Quality

Sample Call Rate

#makeFormatTable(head(sampleSummary))
dataForTable<-makeSummaryTable(dataForReport$SampleSummary$Call.rate,valueCut=c(0.9,0.95,0.96,0.97,0.98,0.99,0.995,0.999))
makeFormatTable(dataForTable,percentColInd=3)
plotVar(dataForReport$SampleSummary,varName="Call.rate",meltTable=FALSE,reportRace=FALSE)

SNP Call Rate

#makeFormatTable(head(snpsum))
dataForTable<-makeSummaryTable(dataForReport$SnpSummary$Call.rate,valueCut=c(0.9,0.95,0.96,0.97,0.98,0.99))
makeFormatTable(dataForTable,percentColInd=3)
plotVar(dataForReport$SnpSummary,varName="Call.rate",reportRace=FALSE)

MAF

#makeFormatTable(head(snpsum))
dataForTable<-makeSummaryTable(dataForReport$SnpSummary$MAF,valueCut=c(0.0001,0.0005,0.001,0.005,0.01,0.05))
makeFormatTable(dataForTable,percentColInd=3)
plotVar(dataForReport$SnpSummary,varName="MAF")

MAF Consistency with 1000 Genome

#if (exists("G1000MafFile") && !is.null(G1000MafFile)) {
  snpSummaryOutColNames1<-c("chromosome","position","snp.name","allele.1","allele.2")
#  snpSummaryOutColNames3<-c("G1000.id","G1000.ref","G1000.alt")

  temp1<-dataForReport$SnpSummary[["AF"]]
  temp2<-dataForReport$SnpSummary[["MAF"]]
  plot(temp1,temp2,pch=16,cex=0.3,xlab="MAF in 1000 Genomes",ylab="MAF in this data",main="Overall MAF")
  mafOutlierDiff<-temp2-temp1
  mafOutlierInd<-which(abs(mafOutlierDiff)>=dataForReport$MafOutlierCut)
  if (length(mafOutlierInd)>0) { #Outlier MAF
    mafOutlierDiff<-mafOutlierDiff[mafOutlierInd]

    snpSummaryOutColNames2<-c("MAF")
    snpSummaryOutColNames4<-c("AF")
#    snpSummaryOutColNames<-c(snpSummaryOutColNames1,snpSummaryOutColNames2,snpSummaryOutColNames3,snpSummaryOutColNames4)
    snpSummaryOutColNames<-c(snpSummaryOutColNames1,snpSummaryOutColNames2,snpSummaryOutColNames4)

    mafOutlierTable<-dataForReport$SnpSummary[mafOutlierInd,..snpSummaryOutColNames]
    mafOutlierTable$Diff=mafOutlierDiff
    #setorder(mafOutlierTable, -Diff)
    mafOutlierTable=mafOutlierTable[rev(order(abs(mafOutlierDiff))),]

    cat("\n\n### MAF Outliers with 1000 Genome\n")
    out=list(makeFormatTable(as.data.frame(mafOutlierTable),makeIntoDT=TRUE))
    print(tagList(out))
  }

  if (!is.null(dataForReport$ReportRace) && dataForReport$ReportRace) {
    cat("\n\n### MAF Consistency with 1000 Genome by Race\n")
    for (i in 1:nrow(dataForReport$MafReportPairs)) {
      temp1<-dataForReport$SnpSummary[[dataForReport$MafReportPairs[i,1]]]
      temp2<-dataForReport$SnpSummary[[dataForReport$MafReportPairs[i,2]]]
      plot(temp1,temp2,pch=16,cex=0.3,xlab="MAF in 1000 Genomes",ylab="MAF in this data",main=paste0(dataForReport$MafReportPairs[i,2]))

      mafOutlierDiff<-temp2-temp1
      mafOutlierInd<-which(abs(mafOutlierDiff)>=dataForReport$MafOutlierCut)
      if (length(mafOutlierInd)>0) { #Outlier MAF
          mafOutlierDiff<-mafOutlierDiff[mafOutlierInd]
          snpSummaryOutColNames2<-dataForReport$MafReportPairs[i,2]
          snpSummaryOutColNames4<-dataForReport$MafReportPairs[i,1]
          #snpSummaryOutColNames<-c(snpSummaryOutColNames1,snpSummaryOutColNames2,snpSummaryOutColNames3,snpSummaryOutColNames4)
          snpSummaryOutColNames<-c(snpSummaryOutColNames1,snpSummaryOutColNames2,snpSummaryOutColNames4)

          mafOutlierTable<-dataForReport$SnpSummary[mafOutlierInd,..snpSummaryOutColNames]
          mafOutlierTable$Diff=mafOutlierDiff
          #setorder(mafOutlierTable, -Diff)
          mafOutlierTable=mafOutlierTable[rev(order(abs(mafOutlierDiff))),]

          out=list(makeFormatTable(as.data.frame(mafOutlierTable),makeIntoDT=TRUE))
          print(tagList(out))
      }

    }
  }
#}

Consistency

Duplicate SNPs

SNP level

if (!is.null(dataForReport$DuplicateSnpsConcordance$SnpLevel)) {
  makeFormatTable(dataForReport$DuplicateSnpsConcordance$SnpLevel,makeIntoDT=TRUE)
}

Sample Level

if (!is.null(dataForReport$DuplicateSnpsConcordance$SampleLevel)) {
  makeFormatTable(dataForReport$DuplicateSnpsConcordance$SampleLevel,makeIntoDT=TRUE)
}

Duplicate Samples

SNP level

if (!is.null(dataForReport$DuplicateSamplesConcordance$SnpLevel)) {
  makeFormatTable(dataForReport$DuplicateSamplesConcordance$SnpLevel,makeIntoDT=TRUE)
}

Sample Level

if (!is.null(dataForReport$DuplicateSamplesConcordance$SampleLevel)) {
  makeFormatTable(dataForReport$DuplicateSamplesConcordance$SampleLevel,makeIntoDT=TRUE)
}

Samples in 1000 Genome

SNP level

if (!is.null(dataForReport$G1000SamplesConcordance$SnpLevel)) {
  makeFormatTable(dataForReport$G1000SamplesConcordance$SnpLevel,makeIntoDT=TRUE)
}

Sample Level

if (!is.null(dataForReport$G1000SamplesConcordance$SampleLevel)) {
  makeFormatTable(dataForReport$G1000SamplesConcordance$SampleLevel,makeIntoDT=TRUE)
}

Gender Check

if (!is.null(dataForReport$GenderCheckTable)) {
  makeFormatTable(dataForReport$GenderCheckTable,makeIntoDT=TRUE)
}

HWE

plotVar(dataForReport$SnpSummary,varName="z.HWE")
dataForTable<-makeSummaryTable(dataForReport$SnpSummary$z.HWE,valueCut=c(0.0001,0.0005,0.001,0.005,0.01,0.05))
makeFormatTable(dataForTable,percentColInd=3)
#hwePCol<-grep("\\.\\p\\.HWE$",colnames(dataForReport$SnpSummary))
#if (length(hwePCol)>0) {
#  dataForTable<-makeSummaryTable(dataForReport$SnpSummary[,..hwePCol],valueCut=c(0.0001,0.0005,0.001,0.005,0.01,0.05))
#  makeFormatTable(dataForTable,percentColInd=3)
#}

Ratios

if (!is.null(dataForReport$SampleRatio) & !is.null(dataForReport$SampleSummary$Race)) {
    dataForPlot<-data.frame(dataForReport$SampleRatio,Race=dataForReport$SampleSummary$Race)
    p<-ggplot(dataForPlot,aes(x=Race,y=ratio01By11))+geom_boxplot()
    print(p)
}

AIM PCA

if (!is.null(dataForReport$AimPcaResult)) {
  if (!is.null(dataForReport$ReportRace) && dataForReport$ReportRace) {
    dataForPlot<-data.frame(dataForReport$AimPcaResult,Race=dataForReport$SampleSummary$Race)
    p<-ggplot(dataForPlot,aes(x=PC1,y=PC2))+geom_point(aes(colour=Race))
  } else { #no race information
    dataForPlot<-data.frame(dataForReport$AimPcaResult)
    p<-ggplot(dataForPlot,aes(x=PC1,y=PC2))+geom_point()
  }
    print(p)
}


slzhao/GTQC documentation built on April 7, 2021, 10:23 p.m.