test_examples.R
In rehh: Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests

#checkes various statistics of example 1
context("examples")

test_that("checked_example1", {
  sink(file = "test_example1.log")
  hh <-
    data2haplohh(
      hap_file = "example1.hap",
      map_file = "example1.map",
      allele_coding = "map"
    )
  
  res <- calc_ehh(
    hh,
    mrk = "rs6",
    limehh = 0,
    discard_integration_at_border = FALSE,
    include_nhaplo = TRUE
  )
  expect_equivalent(res$freq, c(0.5, 0.5))
  expected_ehh_ancestral <-
    c(0, 0, 0, 1 / 6, 1 / 2, 1, 1 / 3, 1 / 6, 0, 0, 0)
  expected_ehh_derived <- c(1 / 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 / 3)
  expect_identical(res$ehh$EHH_A, expected_ehh_ancestral)
  expect_identical(res$ehh$EHH_D, expected_ehh_derived)
  expect_identical(res$ehh$NHAPLO_A, c(0L, 0L, rep(4L, 7), 0L, 0L))
  expect_identical(res$ehh$NHAPLO_D, rep(4L, 11))
  expected_ihh_A <-
    sum(expected_ehh_ancestral[2:10] * 10000) + sum(expected_ehh_ancestral[c(1, 11)] *
                                                      5000)
  expected_ihh_D <-
    sum(expected_ehh_derived[2:10] * 10000) + sum(expected_ehh_derived[c(1, 11)] *
                                                    5000)
  expect_equivalent(res$ihh, cbind(expected_ihh_A, expected_ihh_D))
  scan <- scan_hh(hh, discard_integration_at_border = FALSE)
  expected <- data.frame(
    CHR = rep("chr1", 11),
    POSITION = c(
      10000,
      20000,
      30000,
      40000,
      50000,
      60000,
      70000,
      80000,
      90000,
      1e+05,
      110000
    ),
    FREQ_A = c(
      0.875,
      0.875,
      0.75,
      0.75,
      0.875,
      0.5,
      0.75,
      0.875,
      0.75,
      0.875,
      0.5
    )
    ,
    FREQ_D = c(
      0.125,
      0.125,
      0.25,
      0.25,
      0.125,
      0.5,
      0.25,
      0.125,
      0.25,
      0.125,
      0.5
    )
    ,
    NHAPLO_A = c(7L, 7L, 6L, 6L, 7L, 4L, 6L, 7L, 6L, 7L, 4L),
    NHAPLO_D = c(1L, 1L, 2L, 2L, 1L, 4L, 2L, 1L, 2L, 1L, 4L),
    IHH_A = c(
      21992.2619047619,
      36190.4761904762,
      45000,
      47666.6666666667,
      33333.3333333333,
      18816.6666666667,
      45333.3333333333,
      38571.4285714286,
      50666.6666666667,
      35000,
      25075
    ),
    IHH_D = c(
      0,
      0,
      23512.5,
      28025,
      0,
      89166.6666666667,
      28025,
      0,
      23512.5,
      0,
      25833.3333333333
    ),
    IES = c(
      15295.2380952381,
      25892.8571428571,
      22678.5714285714,
      24285.7142857143,
      23750,
      19821.4285714286,
      23035.7142857143,
      27678.5714285714,
      25714.2857142857,
      25000,
      8032.14285714286
    ),
    INES = c(
      21992.2619047619,
      36190.4761904762,
      43437.5,
      46250,
      33333.3333333333,
      52916.6666666667,
      44062.5,
      38571.4285714286,
      48750,
      35000,
      25416.6666666667
    )
  )
  row.names(expected) <- row.names <- c("rs1",
                                        "rs2",
                                        "rs3",
                                        "rs4",
                                        "rs5",
                                        "rs6",
                                        "rs7",
                                        "rs8",
                                        "rs9",
                                        "rs10",
                                        "rs11")
  expect_equal(scan, expected)
  
  # test extract_regions
  regions <- data.frame(
    CHR = c("chr1", "chr2", "chr1", "chr1"),
    START = c(100000, 10000, 50000, 55000),
    END = c(110000, 120000, 61000, 70000)
  )
  scan_regions <- extract_regions(scan, regions)
  
  expect_identical(scan_regions, scan[c(6, 7, 11), ])
  
  # check integration over stepwise constant rehh curve
  ehh <- calc_ehh(
    hh,
    mrk = "rs6",
    discard_integration_at_border = FALSE,
    phased = FALSE,
    limehh = 0.05,
    interpolate = FALSE
  )
  expected_ihh_A <- 25000 - 0.05 * 30000
  expected_ihh_D <- 100000 - 0.05 * 100000
  expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
  
  ehh <- calc_ehh(
    hh,
    mrk = "rs6",
    discard_integration_at_border = FALSE,
    phased = FALSE,
    limehh = 0,
    interpolate = FALSE
  )
  expected_ihh_A <- 25000
  expected_ihh_D <- 100000
  expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
  
  # check that min_maf excludes mrks
  hh <- data2haplohh(
    hap_file = "example1.hap",
    map_file = "example1.map",
    allele_coding = "map",
    min_maf = 0.2
  )
  
  res <- calc_ehh(
    hh,
    mrk = "rs6",
    limehh = 0,
    discard_integration_at_border = FALSE,
    include_nhaplo = TRUE
  )
  expect_equivalent(res$freq, c(0.5, 0.5))
  expected_ehh_ancestral <- c(0, 1 / 3, 1, 1 / 3, 0, 0)
  expected_ehh_derived <- c(1, 1, 1, 1, 1, 1 / 3)
  expect_identical(res$ehh$EHH_A, expected_ehh_ancestral)
  expect_identical(res$ehh$EHH_D, expected_ehh_derived)
  expect_identical(res$ehh$NHAPLO_A, c(rep(4L, 5), 0L))
  expect_identical(res$ehh$NHAPLO_D, rep(4L, 6))
  
  expected_ihh_A <-
    (1 / 3 * 10000 + (1 / 3 + 1) * 20000 + (1 + 1 / 3) * 10000 +
       1 / 3 * 20000) / 2
  expected_ihh_D <-
    ((1 + 1) * 10000 + (1 + 1) * 20000 + (1 + 1) * 30000 + (1 + 1 /
                                                              3) * 20000) / 2
  expect_equivalent(res$ihh, cbind(expected_ihh_A, expected_ihh_D))
  
  # check that scalegap reduces gaps
  ehh <- calc_ehh(
    hh,
    mrk = "rs6",
    limehh = 0,
    scalegap = 15000,
    discard_integration_at_border = FALSE,
    include_nhaplo = TRUE
  )
  expected_ihh_A <-
    (1 / 3 * 10000 + (1 / 3 + 1) * 15000 + (1 + 1 / 3) * 10000 +
       1 / 3 * 15000) / 2
  expected_ihh_D <-
    ((1 + 1) * 10000 + (1 + 1) * 15000 + (1 + 1) * 25000 + (1 + 1 /
                                                              3) * 15000) / 2
  expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
  
  # check that maxgap cuts integration
  ehh <- calc_ehh(
    hh,
    mrk = "rs6",
    limehh = 0,
    discard_integration_at_border = FALSE,
    maxgap = 15000
  )
  expected_ihh_A <- ((1 + 1 / 3) * 10000) / 2
  expected_ihh_D <- ((1 + 1) * 10000) / 2
  expect_equivalent(ehh$ihh, cbind(expected_ihh_A, expected_ihh_D))
  
  sink()
  file.remove("test_example1.log")
})


test_that("checked_example2", {
  sink(file = "test_example2.log")
  hh <- data2haplohh(
    hap_file = "example2.hap",
    map_file = "example2.map",
    allele_coding = "01",
    min_perc_geno.hap = 0,
    min_perc_geno.mrk = 1
  )
  
  res <- calc_ehh(
    hh,
    mrk = "rs6",
    limehh = 0,
    discard_integration_at_border = FALSE,
    include_nhaplo = TRUE
  )
  expect_equivalent(res$freq, c(4 / 11, 3 / 11, 4 / 11))
  expected_ehh_ancestral <-
    c(0, 0, 0, 1 / 6, 1 / 2, 1, 1 / 3, 1 / 6, 0, 0, 0)
  expected_ehh_derived1 <- c(rep(1, 10), 0)
  expected_ehh_derived2 <-
    c(0, 0, 1 / 3, 1 / 3, 1, 1, 1, 1, 0, 0, 0)
  expect_identical(res$ehh$EHH_A, expected_ehh_ancestral)
  expect_identical(res$ehh$EHH_D1, expected_ehh_derived1)
  expect_identical(res$ehh$EHH_D2, expected_ehh_derived2)
  expect_identical(res$ehh$NHAPLO_A, c(0L, 0L, 2L, rep(4L, 6), 0L, 0L))
  expect_identical(res$ehh$NHAPLO_D1, c(rep(3L, 9), 2L, 2L))
  expect_identical(res$ehh$NHAPLO_D2, c(0L, 2L, rep(3L, 3), rep(4L, 3), 3L, 0L, 0L))
  expected_ihh_A <-
    sum(expected_ehh_ancestral[2:10] * 10000) + sum(expected_ehh_ancestral[c(1, 11)] *
                                                      5000)
  expected_ihh_D1 <-
    sum(expected_ehh_derived1[2:10] * 10000) + sum(expected_ehh_derived1[c(1, 11)] *
                                                     5000)
  expected_ihh_D2 <-
    sum(expected_ehh_derived2[2:10] * 10000) + sum(expected_ehh_derived2[c(1, 11)] *
                                                     5000)
  expect_equivalent(res$ihh,
                    cbind(expected_ihh_A, expected_ihh_D1, expected_ihh_D2))
  
  ## check scan
  scan <- scan_hh(hh, discard_integration_at_border = FALSE)
  expected <- data.frame(
    CHR = rep("chr1", 11),
    POSITION = c(
      10000,
      20000,
      30000,
      40000,
      50000,
      60000,
      70000,
      80000,
      90000,
      1e+05,
      110000
    ),
    FREQ_A = c(
      0.916666666666667,
      0.909090909090909,
      0.9,
      0.75,
      0.909090909090909,
      0.363636363636364,
      0.833333333333333,
      0.916666666666667,
      0.7,
      0.777777777777778,
      0.666666666666667
    )
    ,
    FREQ_D = c(
      0.0833333333333333,
      0.0909090909090909,
      0.1,
      0.25,
      0.0909090909090909,
      0.363636363636364,
      0.166666666666667,
      0.0833333333333333,
      0.2,
      0.222222222222222,
      0.333333333333333
    ),
    NHAPLO_A = c(11L, 10L, 9L, 9L, 10L, 4L, 10L, 11L, 7L, 7L, 8L),
    NHAPLO_D = c(1L, 1L, 1L, 3L, 1L, 4L, 2L, 1L, 2L, 2L, 4L)
    ,
    IHH_A = c(
      25045.2380952381,
      32750.9920634921,
      39881.9444444445,
      38453.373015873,
      29680.1587301587,
      18816.6666666667,
      28960.5158730159,
      27370.1298701299,
      40142.8571428571,
      24452.380952381,
      14291.6666666667
    ),
    IHH_D = c(
      0,
      0,
      0,
      21383.3333333333,
      0,
      43216.6666666667,
      28025,
      0,
      33012.5,
      9025,
      13037.5
    ),
    IES = c(
      19362.1212121212,
      25023.5930735931,
      30057.1428571429,
      23654.6717171717,
      22727.2005772006,
      9582.1608946609,
      17869.696969697,
      21479.797979798,
      15730.1587301587,
      11326.9841269841,
      5890.83694083694
    ),
    INES = c(
      25045.2380952381,
      32750.9920634921,
      39881.9444444445,
      38145.5069124424,
      29680.1587301587,
      49005.9523809524,
      28743.6766567576,
      27370.1298701299,
      39857.9545454545,
      23125,
      14158.2264957265
    )
  )
  row.names(expected) <- row.names <- c("rs1",
                                        "rs2",
                                        "rs3",
                                        "rs4",
                                        "rs5",
                                        "rs6",
                                        "rs7",
                                        "rs8",
                                        "rs9",
                                        "rs10",
                                        "rs11")
  expect_equal(scan, expected)
  
  sink()
  file.remove("test_example2.log")
})

test_that("checked_unpolarized", {
  sink(file = "test_unpolarized.log")
  hh <- data2haplohh(hap_file = "example1.hap",
                     map_file = "example1.map", 
                     allele_coding = "map")
  
  ihh_1 <- scan_hh(hh,
                   discard_integration_at_border = FALSE,
                   polarized = TRUE)
  ihh_2 <- scan_hh(hh,
                   discard_integration_at_border = FALSE,
                   polarized = FALSE)
  
  ihs1 <- ihh2ihs(ihh_1, freqbin = 1)
  ihs2 <- ihh2ihs(ihh_2, freqbin = 1)
  
  expect_identical(ihs1, ihs2)
  
  # test that allele coding is irrelevant for unpolarized data,
  # (as long as the order is not changed, since for equal frequency,
  # the "major" allele is the one with lower encoding
  hh@haplo[hh@haplo == 0] <- 4L # arbitrary integer number
  hh@haplo[hh@haplo == 1] <- 7L # arbitrary, bigger integer number
  
  ihh_3 <- scan_hh(hh,
                   discard_integration_at_border = FALSE,
                   polarized = FALSE)
  
  expect_identical(ihh_2, ihh_3)
  sink()
  file.remove("test_unpolarized.log")
})

test_that("checked_limhaplo", {
  sink(file = "test_limhaplo.log")
  hh <- data2haplohh(
    hap_file = "example2.hap",
    map_file = "example2.map",
    allele_coding = "map",
    min_perc_geno.mrk = 1
  )
  
  expected_ihh <-
    c(IHH_A = 18816.6666666667,
      IHH_D1 = 80512.5,
      IHH_D2 = 43216.6666666667)
  ehh <-
    calc_ehh(
      hh,
      mrk = "rs6",
      limhaplo = 3,
      discard_integration_at_border = FALSE
    )
  expect_equal(ehh$ihh, expected_ihh)
  
  expected_ihh <-
    c(IHH_A = 18816.6666666667,
      IHH_D1 = 0,
      IHH_D2 = 28025)
  ehh <-
    calc_ehh(
      hh,
      mrk = "rs6",
      limhaplo = 4,
      discard_integration_at_border = FALSE
    )
  expect_equal(ehh$ihh, expected_ihh)
  
  sink()
  file.remove("test_limhaplo.log")
})

Any scripts or data that you put into this service are public.

rehh documentation built on Sept. 15, 2021, 5:06 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

rehh
Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests

tests/testthat/test_examples.R
In rehh: Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests

Try the rehh package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rehh Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests

tests/testthat/test_examples.R In rehh: Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests

Try the rehh package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

rehh
Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests

tests/testthat/test_examples.R
In rehh: Searching for Footprints of Selection using 'Extended Haplotype Homozygosity' Based Tests