doc/whole_exome_scans_with_ClusterBurden.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width=7
)

## -----------------------------------------------------------------------------
library(ClusterBurden)

n_random_genes = 200
disease = "cardiomyopathy"

genes = find_genes(n_random_genes, disease)

controls = collect_gnomad_controls(genenames=genes)

# control dataset
summary(controls)

cases = collect_example_cases(controls, disease)

# case dataset
summary(cases)

# disease genes
paste(cases[group=="cln", unique(symbol)], collapse=", ")


## -----------------------------------------------------------------------------
pvals = ClusterBurden_WES(cases, controls, covstats = T)
# BIN_test_WES can calculate only clustering p-values
# burden_WES can be used for only burden p-values 
# covstats=T provides more coverage details for each gene including the protein ranges excluded from the analysis
# Coverage data is automatically supplied with cases and controls generated by the collect_* functions

# "pvals" output 
head(pvals[order(ClusterBurden)])


## -----------------------------------------------------------------------------
manhattan(pvals, "BIN-test", n_genes=10, SCALE=0.5)
manhattan(pvals, "ClusterBurden", n_genes=10, SCALE=0.5)

## ----fig.height=6-------------------------------------------------------------
plot_signif_distribs(pvals, cases, controls, n_genes = 9, SCALE=0.5, test="BIN-test")

## ----fig.height=7-------------------------------------------------------------
# Top clustering signals 
pvals[order(BIN.test_pvalue)][,.(symbol, BIN.test_pvalue, pl_flag, cov_flag1, cov_flag2)][1:5]

# Choose a gene to investigate further
plot_features("MYH7", pvals, cases, controls, SCALE=0.7)
adamwaring/ClusterBurden documentation built on July 29, 2020, 9:50 p.m.