# unit testing for the recessiveStats functions
library(recessiveStats)
library(testthat)
context("Recessive statistic checks")
test_that("analyse_inherited_enrichment output is correct", {
vars = read.table(header = TRUE, text = "
CHROM POS REF ALT AC AN CQ
1 1 A G 1 1000 missense_variant
1 2 G C 1 1000 stop_gained
1 3 T A 1 1000 stop_lost
1 4 G T 1 1000 synonymous_variant
")
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2, "biallelic_syn"=1)
cohort_n = 1000
result = list(lof=0.001, functional=0.001999, synonymous=0.001,
biallelic_lof_p=0.000999500666126,
lof_func_p=2.061908282e-08,
biallelic_func_p=1.056905e-11,
biallelic_syn_p=0.00099950067,
all_p=9.191389e-19)
expect_equal(analyse_inherited_enrichment(counts, vars, cohort_n), result)
})
test_that("analyse_inherited_enrichment output is correct for multi-population", {
vars1 = read.table(header = TRUE, text = "
CHROM POS REF ALT AC AN CQ
1 1 A G 1 1000 missense_variant
1 2 G C 1 1000 stop_gained
")
vars2 = read.table(header = TRUE, text = "
CHROM POS REF ALT AC AN CQ
1 1 A G 1 1000 missense_variant
1 2 G C 1 1000 stop_gained
")
vars = list("first"=vars1, "second"=vars2)
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2,
'biallelic_syn'=1)
cohort_n = list("first"=500, "second"=500)
result = list(lof=NA, functional=NA, synonymous=NA,
biallelic_lof_p=0.000999500666126,
lof_func_p=4.467516e-09,
biallelic_func_p=4.138414e-14,
biallelic_syn_p=0.000995516608,
all_p=3.16097e-21)
expect_equal(analyse_inherited_enrichment(counts, vars, cohort_n), result)
})
test_that("analyse_inherited_enrichment output can use different frequency threshold", {
vars = read.table(header = TRUE, text = "
CHROM POS REF ALT AC AN CQ
1 1 A G 1 1000 missense_variant
1 2 G C 1 1000 stop_gained
1 3 T A 8 1000 stop_gained
1 4 G T 1 1000 synonymous_variant
")
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2, 'biallelic_syn'=1)
cohort_n = list("first"=500, "second"=500)
result = list(lof=0.008992, functional=0.001, synonymous=0.001,
biallelic_lof_p=0.07767657724,
lof_func_p=0.0001483348721,
biallelic_func_p=4.138413726e-14,
biallelic_syn_p=0.000999500666,
all_p=1.761373e-11)
expect_equal(analyse_inherited_enrichment(counts, vars, cohort_n,
threshold=0.01), result)
# now check when we change the frequency threshold
result = list(lof=0.001, functional=0.001, synonymous=0.001,
biallelic_lof_p=0.0009995006661,
lof_func_p=4.467516392e-09,
biallelic_func_p=4.138413726e-14,
biallelic_syn_p=0.000999500666,
all_p=3.16097e-21)
expect_equal(analyse_inherited_enrichment(counts, vars, cohort_n,
threshold=0.005), result)
})
test_that("analyse_inherited_enrichment output raises an error if the cohort
populations aren't in the variant dataset", {
vars1 = read.table(header = TRUE, text = "
CHROM POS REF ALT AC AN CQ
1 1 A G 1 1000 missense_variant
1 2 G C 1 1000 stop_gained
")
vars2 = read.table(header = TRUE, text = "
CHROM POS REF ALT AC AN CQ
1 1 A G 1 1000 missense_variant
1 2 G C 1 1000 stop_gained
")
vars = list("first"=vars1, "second"=vars2)
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2, 'biallelic_syn'=1)
cohort_n = list("first"=500, "third"=500)
expect_error(analyse_inherited_enrichment(counts, vars, cohort_n))
})
test_that("enrichment_single_population output is correct", {
freq = list(lof=0.001, functional=0.1, synonymous=0.1)
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2, 'biallelic_syn'=1)
result = list(lof=0.001, functional=0.1, synonymous=0.1,
biallelic_lof_p=0.000999500666126,
lof_func_p=0.001158649871933,
biallelic_func_p=0.989927345227986,
biallelic_syn_p=0.999956829,
all_p=0.883268773181358)
expect_equal(enrichment_single_population(freq, counts, 1000), result)
})
test_that("enrichment_single_population output is correct for NA input", {
freq = list(lof=NA, functional=NA, synonymous=NA)
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2, 'biallelic_syn'=1)
result = list(lof=NA, functional=NA, synonymous=NA,
biallelic_lof_p=as.numeric(NA),
lof_func_p=as.numeric(NA),
biallelic_func_p=as.numeric(NA),
biallelic_syn_p=as.numeric(NA),
all_p=as.numeric(NA))
expect_identical(enrichment_single_population(freq, counts, 1000), result)
})
test_that("enrichment_single_population output is correct when correcting for autozygosity", {
freq = list(lof=0.001, functional=0.1, synonymous=0.1)
counts = list("biallelic_lof"=1, 'biallelic_func'=4, 'lof_func'=2, 'biallelic_syn'=1)
result = list(lof=0.001, functional=0.1, synonymous=0.1,
biallelic_lof_p=0.0019970053242,
lof_func_p=0.0011751598957,
biallelic_func_p=0.99057656658,
biallelic_syn_p=0.99996058,
all_p=0.888449945683488)
expect_equal(enrichment_single_population(freq, counts, 1000, autozygosity=0.001), result)
# also try the test when we explicitly state the autozygous rate is zero
result = list(lof=0.001, functional=0.1, synonymous=0.1,
biallelic_lof_p=0.000999500666126,
lof_func_p=0.001158649871933,
biallelic_func_p=0.989927345227986,
biallelic_syn_p=0.999956829,
all_p=0.883268773181358)
expect_equal(enrichment_single_population(freq, counts, 1000, autozygosity=0), result)
})
test_that("enrichment_single_population output is correct when missing count data", {
freq = list(lof=0.001, functional=0.1, synonymous=0.1)
counts = list("biallelic_lof"=1, 'lof_func'=2)
result = list(lof=0.001, functional=0.1, synonymous=0.1,
biallelic_lof_p=0.0009995006661,
lof_func_p=0.001158649872,
biallelic_func_p=numeric(0),
biallelic_syn_p=numeric(0),
all_p=numeric(0))
expect_equal(enrichment_single_population(freq, counts, 1000), result)
})
context("Population combination generation checks")
test_that("get_count_combinations is correct", {
populations = c("A", "B")
count = 2
result = read.table(header=TRUE, text="
A B
2 0
1 1
0 2",
colClasses=c("integer", "integer"))
expect_equal(get_count_combinations(populations, count), result)
expect_error(get_count_combinations(populations, -1), "count >= 0 is not TRUE")
})
test_that("get_count_combinations is correct for count of zero", {
populations = c("A", "B")
count = 0
result = read.table(header=TRUE, text="
A B
0 0",
colClasses=c("integer", "integer"))
expect_equal(get_count_combinations(populations, count), result)
})
test_that("get_count_combinations is correct for more cohorts", {
populations = c("A", "B", "C")
count = 4
result = read.table(header=TRUE, text="
A B C
0 0 4
0 1 3
0 2 2
0 3 1
0 4 0
1 0 3
1 1 2
1 2 1
1 2 2
1 3 0
2 0 2
2 1 1
2 1 2
2 2 0
2 2 1
2 2 2 # this is the extra row that captures the remaining possibilities
3 0 1
3 1 0
4 0 0",
colClasses=c("integer", "integer"))
# get the combos out, and make sure it is sorted correctly
combos = get_count_combinations(populations, count)
combos = combos[ do.call(order, as.list(combos)), ]
row.names(combos) = 1:nrow(combos)
expect_equal(combos, result)
})
test_that("get_count_combinations is correct for NA cohorts", {
#
expect_equal(get_count_combinations(populations, NA), NA)
})
context("Enrichment tests return the correct P-values")
test_that("enrichment_multiple_populations is correct", {
freqs = list("AFR"=list("lof"=0.01, "functional"=0.1, 'synonymous'=0.1),
"EAS"=list("lof"=0.02, "functional"=0.1, 'synonymous'=0.1))
cohort_n = list("AFR"=50, "EAS"=150)
autozygous_rate = 0.005
counts = list("biallelic_lof"=4, 'biallelic_func'=6, 'lof_func'=3, 'biallelic_syn'=1)
# define the values that would be obtained for all the individual tests.
# We get NA values for the cumulative loF and functional frequencies, since
# this test operates on multiple populations, so we can't get a single value
# for the frequencies.
result = list(lof=NA, functional=NA, synonymous=NA, biallelic_lof_p=1.721856125e-06,
lof_func_p=1.4797290028e-05, biallelic_func_p=0.019410324318,
biallelic_syn_p=0.877665591, all_p=7.68708279005614e-06)
expect_equal(enrichment_multiple_populations(freqs,
counts, cohort_n, autozygous_rate), result)
})
test_that("sum_combo_tests is correct", {
freqs = list("A"=list("lof"=0.1, "functional"=0.1, 'synonymous'=0.3),
"B"=list("lof"=0.1, "functional"=0.1, 'synonymous'=0.3))
cohort_n = list("A"=100, "B"=100)
combos = get_count_combinations(names(cohort_n), count=1)
expect_equal(sum_combo_tests(freqs, cohort_n, combos,
biallelic_lof_enrichment), 0.866020325142038)
# and test for a larger number of families
combos = get_count_combinations(names(cohort_n), count=7)
expect_equal(sum_combo_tests(freqs, cohort_n, combos,
biallelic_lof_enrichment), 0.00429554232885157)
# check that summing across populations where the frequencies are the same
# gives the same answer as for the single population p-value, even if the
# population sizes differ between ethnicities.
cohort_n = list("A"=20, "B"=180)
expect_equal(sum_combo_tests(freqs, cohort_n, combos, biallelic_lof_enrichment),
pbinom(6, sum(unlist(cohort_n)), freqs[[1]][["lof"]]**2, lower.tail=FALSE))
# if the combos object is NA, then it returns NA
expect_equal(sum_combo_tests(freqs, cohort_n, NA, biallelic_lof_enrichment), NA)
})
test_that("single enrichment tests are correct", {
freq = list("lof"=0.1, "functional"=0.2, 'synonymous'=0.3)
cohort_n = 100
count = 2
# make sure the calculation uses the binomial model we expect
expect_equal(biallelic_lof_enrichment(freq, count, cohort_n),
pbinom(1, 100, 0.01, lower.tail=FALSE))
# make sure the calculation gives us the freqst value we expect
expect_equal(biallelic_lof_enrichment(freq, count, cohort_n),
0.264238021077044)
# a count of zero should have a p-value of 1
expect_equal(biallelic_lof_enrichment(freq, 0, cohort_n), 1)
# check the lof/func enrichment calculation
expect_equal(lof_func_enrichment(freq, count, cohort_n),
0.947531944275668)
# check the functional enrichment calculation
expect_equal(biallelic_func_enrichment(freq, count, cohort_n),
0.91283668331261)
# check that NA counts return NA values
expect_identical(biallelic_func_enrichment(freq, NA, cohort_n),
as.numeric(NA))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.