Nothing
#checkes various statistics of example 1
context("examples")
test_that("checked_example1", {
sink(file = "test_example1.log")
hh <-
data2haplohh(
hap_file = "example1.hap",
map_file = "example1.map",
allele_coding = "map"
)
res <- calc_ehh(
hh,
mrk = "rs6",
limehh = 0,
discard_integration_at_border = FALSE,
include_nhaplo = TRUE
)
expect_equivalent(res$freq, c(0.5, 0.5))
expected_ehh_ancestral <-
c(0, 0, 0, 1 / 6, 1 / 2, 1, 1 / 3, 1 / 6, 0, 0, 0)
expected_ehh_derived <- c(1 / 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 / 3)
expect_identical(res$ehh$EHH_A, expected_ehh_ancestral)
expect_identical(res$ehh$EHH_D, expected_ehh_derived)
expect_identical(res$ehh$NHAPLO_A, c(0L, 0L, rep(4L, 7), 0L, 0L))
expect_identical(res$ehh$NHAPLO_D, rep(4L, 11))
expected_ihh_A <-
sum(expected_ehh_ancestral[2:10] * 10000) + sum(expected_ehh_ancestral[c(1, 11)] *
5000)
expected_ihh_D <-
sum(expected_ehh_derived[2:10] * 10000) + sum(expected_ehh_derived[c(1, 11)] *
5000)
expect_equivalent(res$ihh, cbind(expected_ihh_A, expected_ihh_D))
scan <- scan_hh(hh, discard_integration_at_border = FALSE)
expected <- data.frame(
CHR = rep("chr1", 11),
POSITION = c(
10000,
20000,
30000,
40000,
50000,
60000,
70000,
80000,
90000,
1e+05,
110000
),
FREQ_A = c(
0.875,
0.875,
0.75,
0.75,
0.875,
0.5,
0.75,
0.875,
0.75,
0.875,
0.5
)
,
FREQ_D = c(
0.125,
0.125,
0.25,
0.25,
0.125,
0.5,
0.25,
0.125,
0.25,
0.125,
0.5
)
,
NHAPLO_A = c(7L, 7L, 6L, 6L, 7L, 4L, 6L, 7L, 6L, 7L, 4L),
NHAPLO_D = c(1L, 1L, 2L, 2L, 1L, 4L, 2L, 1L, 2L, 1L, 4L),
IHH_A = c(
21992.2619047619,
36190.4761904762,
45000,
47666.6666666667,
33333.3333333333,
18816.6666666667,
45333.3333333333,
38571.4285714286,
50666.6666666667,
35000,
25075
),
IHH_D = c(
0,
0,
23512.5,
28025,
0,
89166.6666666667,
28025,
0,
23512.5,
0,
25833.3333333333
),
IES = c(
15295.2380952381,
25892.8571428571,
22678.5714285714,
24285.7142857143,
23750,
19821.4285714286,
23035.7142857143,
27678.5714285714,
25714.2857142857,
25000,
8032.14285714286
),
INES = c(
21992.2619047619,
36190.4761904762,
43437.5,
46250,
33333.3333333333,
52916.6666666667,
44062.5,
38571.4285714286,
48750,
35000,
25416.6666666667
)
)
row.names(expected) <- row.names <- c("rs1",
"rs2",
"rs3",
"rs4",
"rs5",
"rs6",
"rs7",
"rs8",
"rs9",
"rs10",
"rs11")
expect_equal(scan, expected)
# test extract_regions
regions <- data.frame(
CHR = c("chr1", "chr2", "chr1", "chr1"),
START = c(100000, 10000, 50000, 55000),
END = c(110000, 120000, 61000, 70000)
)
scan_regions <- extract_regions(scan, regions)
expect_identical(scan_regions, scan[c(6, 7, 11), ])
# check integration over stepwise constant rehh curve
ehh <- calc_ehh(
hh,
mrk = "rs6",
discard_integration_at_border = FALSE,
phased = FALSE,
limehh = 0.05,
interpolate = FALSE
)
expected_ihh_A <- 25000 - 0.05 * 30000
expected_ihh_D <- 100000 - 0.05 * 100000
expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
ehh <- calc_ehh(
hh,
mrk = "rs6",
discard_integration_at_border = FALSE,
phased = FALSE,
limehh = 0,
interpolate = FALSE
)
expected_ihh_A <- 25000
expected_ihh_D <- 100000
expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
# check that min_maf excludes mrks
hh <- data2haplohh(
hap_file = "example1.hap",
map_file = "example1.map",
allele_coding = "map",
min_maf = 0.2
)
res <- calc_ehh(
hh,
mrk = "rs6",
limehh = 0,
discard_integration_at_border = FALSE,
include_nhaplo = TRUE
)
expect_equivalent(res$freq, c(0.5, 0.5))
expected_ehh_ancestral <- c(0, 1 / 3, 1, 1 / 3, 0, 0)
expected_ehh_derived <- c(1, 1, 1, 1, 1, 1 / 3)
expect_identical(res$ehh$EHH_A, expected_ehh_ancestral)
expect_identical(res$ehh$EHH_D, expected_ehh_derived)
expect_identical(res$ehh$NHAPLO_A, c(rep(4L, 5), 0L))
expect_identical(res$ehh$NHAPLO_D, rep(4L, 6))
expected_ihh_A <-
(1 / 3 * 10000 + (1 / 3 + 1) * 20000 + (1 + 1 / 3) * 10000 +
1 / 3 * 20000) / 2
expected_ihh_D <-
((1 + 1) * 10000 + (1 + 1) * 20000 + (1 + 1) * 30000 + (1 + 1 /
3) * 20000) / 2
expect_equivalent(res$ihh, cbind(expected_ihh_A, expected_ihh_D))
# check that scalegap reduces gaps
ehh <- calc_ehh(
hh,
mrk = "rs6",
limehh = 0,
scalegap = 15000,
discard_integration_at_border = FALSE,
include_nhaplo = TRUE
)
expected_ihh_A <-
(1 / 3 * 10000 + (1 / 3 + 1) * 15000 + (1 + 1 / 3) * 10000 +
1 / 3 * 15000) / 2
expected_ihh_D <-
((1 + 1) * 10000 + (1 + 1) * 15000 + (1 + 1) * 25000 + (1 + 1 /
3) * 15000) / 2
expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
# check that maxgap cuts integration
ehh <- calc_ehh(
hh,
mrk = "rs6",
limehh = 0,
discard_integration_at_border = FALSE,
maxgap = 15000
)
expected_ihh_A <- ((1 + 1 / 3) * 10000) / 2
expected_ihh_D <- ((1 + 1) * 10000) / 2
expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
sink()
file.remove("test_example1.log")
})
test_that("checked_example2", {
sink(file = "test_example2.log")
hh <- data2haplohh(
hap_file = "example2.hap",
map_file = "example2.map",
allele_coding = "01",
min_perc_geno.hap = 0,
min_perc_geno.mrk = 1
)
res <- calc_ehh(
hh,
mrk = "rs6",
limehh = 0,
discard_integration_at_border = FALSE,
include_nhaplo = TRUE
)
expect_equivalent(res$freq, c(4 / 11, 3 / 11, 4 / 11))
expected_ehh_ancestral <-
c(0, 0, 0, 1 / 6, 1 / 2, 1, 1 / 3, 1 / 6, 0, 0, 0)
expected_ehh_derived1 <- c(rep(1, 10), 0)
expected_ehh_derived2 <-
c(0, 0, 1 / 3, 1 / 3, 1, 1, 1, 1, 0, 0, 0)
expect_identical(res$ehh$EHH_A, expected_ehh_ancestral)
expect_identical(res$ehh$EHH_D1, expected_ehh_derived1)
expect_identical(res$ehh$EHH_D2, expected_ehh_derived2)
expect_identical(res$ehh$NHAPLO_A, c(0L, 0L, 2L, rep(4L, 6), 0L, 0L))
expect_identical(res$ehh$NHAPLO_D1, c(rep(3L, 9), 2L, 2L))
expect_identical(res$ehh$NHAPLO_D2, c(0L, 2L, rep(3L, 3), rep(4L, 3), 3L, 0L, 0L))
expected_ihh_A <-
sum(expected_ehh_ancestral[2:10] * 10000) + sum(expected_ehh_ancestral[c(1, 11)] *
5000)
expected_ihh_D1 <-
sum(expected_ehh_derived1[2:10] * 10000) + sum(expected_ehh_derived1[c(1, 11)] *
5000)
expected_ihh_D2 <-
sum(expected_ehh_derived2[2:10] * 10000) + sum(expected_ehh_derived2[c(1, 11)] *
5000)
expect_equivalent(res$ihh,
cbind(expected_ihh_A, expected_ihh_D1, expected_ihh_D2))
## check scan
scan <- scan_hh(hh, discard_integration_at_border = FALSE)
expected <- data.frame(
CHR = rep("chr1", 11),
POSITION = c(
10000,
20000,
30000,
40000,
50000,
60000,
70000,
80000,
90000,
1e+05,
110000
),
FREQ_A = c(
0.916666666666667,
0.909090909090909,
0.9,
0.75,
0.909090909090909,
0.363636363636364,
0.833333333333333,
0.916666666666667,
0.7,
0.777777777777778,
0.666666666666667
)
,
FREQ_D = c(
0.0833333333333333,
0.0909090909090909,
0.1,
0.25,
0.0909090909090909,
0.363636363636364,
0.166666666666667,
0.0833333333333333,
0.2,
0.222222222222222,
0.333333333333333
),
NHAPLO_A = c(11L, 10L, 9L, 9L, 10L, 4L, 10L, 11L, 7L, 7L, 8L),
NHAPLO_D = c(1L, 1L, 1L, 3L, 1L, 4L, 2L, 1L, 2L, 2L, 4L)
,
IHH_A = c(
25045.2380952381,
32750.9920634921,
39881.9444444445,
38453.373015873,
29680.1587301587,
18816.6666666667,
28960.5158730159,
27370.1298701299,
40142.8571428571,
24452.380952381,
14291.6666666667
),
IHH_D = c(
0,
0,
0,
21383.3333333333,
0,
43216.6666666667,
28025,
0,
33012.5,
9025,
13037.5
),
IES = c(
19362.1212121212,
25023.5930735931,
30057.1428571429,
23654.6717171717,
22727.2005772006,
9582.1608946609,
17869.696969697,
21479.797979798,
15730.1587301587,
11326.9841269841,
5890.83694083694
),
INES = c(
25045.2380952381,
32750.9920634921,
39881.9444444445,
38145.5069124424,
29680.1587301587,
49005.9523809524,
28743.6766567576,
27370.1298701299,
39857.9545454545,
23125,
14158.2264957265
)
)
row.names(expected) <- row.names <- c("rs1",
"rs2",
"rs3",
"rs4",
"rs5",
"rs6",
"rs7",
"rs8",
"rs9",
"rs10",
"rs11")
expect_equal(scan, expected)
sink()
file.remove("test_example2.log")
})
test_that("checked_unpolarized", {
sink(file = "test_unpolarized.log")
hh <- data2haplohh(hap_file = "example1.hap",
map_file = "example1.map",
allele_coding = "map")
ihh_1 <- scan_hh(hh,
discard_integration_at_border = FALSE,
polarized = TRUE)
ihh_2 <- scan_hh(hh,
discard_integration_at_border = FALSE,
polarized = FALSE)
ihs1 <- ihh2ihs(ihh_1, freqbin = 1)
ihs2 <- ihh2ihs(ihh_2, freqbin = 1)
expect_identical(ihs1, ihs2)
# test that allele coding is irrelevant for unpolarized data,
# (as long as the order is not changed, since for equal frequency,
# the "major" allele is the one with lower encoding
hh@haplo[hh@haplo == 0] <- 4L # arbitrary integer number
hh@haplo[hh@haplo == 1] <- 7L # arbitrary, bigger integer number
ihh_3 <- scan_hh(hh,
discard_integration_at_border = FALSE,
polarized = FALSE)
expect_identical(ihh_2, ihh_3)
sink()
file.remove("test_unpolarized.log")
})
test_that("checked_limhaplo", {
sink(file = "test_limhaplo.log")
hh <- data2haplohh(
hap_file = "example2.hap",
map_file = "example2.map",
allele_coding = "map",
min_perc_geno.mrk = 1
)
expected_ihh <-
c(IHH_A = 18816.6666666667,
IHH_D1 = 80512.5,
IHH_D2 = 43216.6666666667)
ehh <-
calc_ehh(
hh,
mrk = "rs6",
limhaplo = 3,
discard_integration_at_border = FALSE
)
expect_equal(ehh$ihh, expected_ihh)
expected_ihh <-
c(IHH_A = 18816.6666666667,
IHH_D1 = 0,
IHH_D2 = 28025)
ehh <-
calc_ehh(
hh,
mrk = "rs6",
limhaplo = 4,
discard_integration_at_border = FALSE
)
expect_equal(ehh$ihh, expected_ihh)
sink()
file.remove("test_limhaplo.log")
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.