knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=7 )
library(ClusterBurden) controls = collect_gnomad_controls(dataset="exome") cases = collect_gnomad_controls(dataset="genome") cases[,aff:=1] attributes(controls)$provenance = attributes(cases)$provenance = NULL binpval = BIN_test_WES(cases, controls) head(binpval)
# nominal significance binpval[,sum(BIN.test_pvalue < 0.05, na.rm=T)/.N] # Bonferonni significance binpval[,sum(BIN.test_pvalue < 0.05/.N)/.N]
# function to generate lambda inflation metric lambda = function(p) median(qchisq(p, df=1, lower.tail=FALSE)) / qchisq(0.5, 1) # lambda across all non-NA p-values lambda(binpval[!is.na(BIN.test_pvalue), BIN.test_pvalue]) # lambda across p-values not in giant proteins i.e. > 2000 residues lambda(binpval[!is.na(BIN.test_pvalue) & !grepl("high", pl_flag), BIN.test_pvalue])
manhattan(binpval, "bin-test", 10, SCALE=0.5)
attributes(controls)$provenance = attributes(cases)$provenance = "auto" binpval = BIN_test_WES(cases, controls) binpval
It is clear that coverage differences was the main driver of significant results. Lambda is now reduced to become below 1 however nominally significant results do exceed 5%. The manhattan plot reveals some Bonferonni significant results indicating either some uncontrolled for confounders (e.g. ancestry) or true differences between the exomes and genomes cohorts (unlikely but not impossible).
# nominal significance binpval[,sum(BIN.test_pvalue < 0.05, na.rm=T)/.N] # Bonferonni significance binpval[,sum(BIN.test_pvalue < 0.05/.N, na.rm=T)/.N] # function to generate lambda inflation metric lambda = function(p) median(qchisq(p, df=1, lower.tail=FALSE)) / qchisq(0.5, 1) # lambda across all non-NA p-values lambda(binpval[!is.na(BIN.test_pvalue), BIN.test_pvalue]) # manhattan manhattan(binpval, "bin-test", 10, SCALE=0.5)
Alongside the p-values there are flags to alert the user to potential confounders including pl_flag for protein length and cov_flag1 and cov_flag2 for coverage. After removing genes with coverage concern level flagged as high (or very high) the same calculations are made.
binpval = binpval[!grepl("high", cov_flag1) & !grepl("high", cov_flag2) & !grepl("high", pl_flag)] # nominal significance binpval[,sum(BIN.test_pvalue < 0.05, na.rm=T)/.N] # Bonferonni significance binpval[,sum(BIN.test_pvalue < 0.05/.N, na.rm=T)/.N] # function to generate lambda inflation metric lambda = function(p) median(qchisq(p, df=1, lower.tail=FALSE)) / qchisq(0.5, 1) # lambda across all non-NA p-values lambda(binpval[!is.na(BIN.test_pvalue), BIN.test_pvalue]) # manhattan manhattan(binpval, "bin-test", 10, SCALE=0.5)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.