tests/testthat/test_genomecov.R

x <- tibble::tribble(
  ~chrom, ~start, ~end, ~strand,
  "chr1", 20, 70, "+",
  "chr1", 50, 100, "-",
  "chr1", 200, 250, "+",
  "chr1", 220, 250, "+"
)

genome <- tibble::tribble(
  ~chrom, ~size,
  "chr1", 500,
  "chr2", 1000
)
test_that("bed_genomecov works", {
  ex <- tibble::tribble(
    ~chrom, ~start, ~end, ~.depth,
    "chr1", 20L, 50L, 1L,
    "chr1", 50L, 70L, 2L,
    "chr1", 70L, 100L, 1L,
    "chr1", 200L, 220L, 1L,
    "chr1", 220L, 250L, 2L
  )
  obs <- bed_genomecov(x, genome)
  expect_identical(obs, ex)

  res <- bed_genomecov(x, genome, zero_depth = TRUE)
  expect_true(sum(res$.depth == 0) > 0)
})

test_that("groups are respected", {
  ex <- tibble::tribble(
    ~chrom, ~start, ~end, ~strand, ~.depth,
    "chr1", 20L, 70L, "+", 1L,
    "chr1", 200L, 220L, "+", 1L,
    "chr1", 220L, 250L, "+", 2L,
    "chr1", 50L, 100L, "-", 1L
  )
  obs <- bed_genomecov(group_by(x, strand), genome)
  expect_identical(obs, ex)

  res <- bed_genomecov(group_by(x, strand), genome, zero_depth = TRUE)
  expect_equal(sum(res$chrom == "chr2"), 2L)
})

test_that("grouping is retained for zero depth intervals", {
  xx <- tibble::tribble(
    ~chrom, ~start, ~end, ~strand, ~grp,
    "chr1", 20L, 70L, "+", 1,
    "chr1", 200L, 220L, "-", 1,
    "chr1", 20L, 70L, "+", 2,
    "chr1", 200L, 220L, "-", 2
  ) |>
    group_by(strand, grp)

  many_chroms_genome <- tibble(
    chrom = c("chr1", LETTERS),
    size  = 500
  )

  res <- bed_genomecov(xx, many_chroms_genome, zero_depth = TRUE)
  expect_equal(length(setdiff(many_chroms_genome$chrom, res$chrom)), 0L)

  ex <- tibble::tribble(
    ~strand, ~grp, ~n,
    "+", 1, 26L,
    "+", 2, 26L,
    "-", 1, 26L,
    "-", 2, 26L
  )

  lr <- res[res$chrom %in% LETTERS, ]
  nlr <- lr |>
    group_by(strand, grp) |>
    summarize(n = n()) |>
    ungroup()
  expect_identical(nlr, ex)
})

test_that("chroms in bed, not in genome, are dropped", {
  xx <- tibble::tribble(
    ~chrom, ~start, ~end, ~strand, ~.depth,
    "hello", 20L, 70L, "+", 1L,
    "world", 200L, 220L, "+", 1L,
    "chr1", 220L, 250L, "+", 2L
  )
  expect_warning(res <- bed_genomecov(xx, genome))
  expect_true(all(res$chrom == "chr1"))
})

test_that("zero length input is handled", {
  xx <- tibble::tribble(
    ~chrom, ~start, ~end, ~strand, ~.depth,
    "hello", 20L, 70L, "+", 1L,
  )

  expect_warning(res <- bed_genomecov(xx, genome))
  expect_true(nrow(res) == 0)

  xx <- xx[xx$chrom != "hello", ]

  res <- bed_genomecov(xx, genome)
  expect_true(nrow(res) == 0)
})

test_that("check edge cases with 1 bp intervals", {
  # base-level coverage equals number of basepairs in input intervals
  genome <- tribble(
    ~chrom, ~size,
    "chr1", 1e5
  )
  seed <- 1010486
  ivls <- bed_random(genome, length = 1, n = 1e3, seed = seed)

  res <- bed_genomecov(ivls, genome)
  expect_true(sum(res$.depth) == 1e3)

  ivls <- tibble(
    chrom = "chr1",
    start = seq(0, 999),
    end = start + 2
  )
  res <- bed_genomecov(ivls, genome)
  expect_true(sum(res$.depth) == (1e3 * 2))

  set.seed(seed)
  ivls <- tibble(
    chrom = "chr1",
    start = seq(0, 999),
    end = start + sample(1:100, length(start), replace = TRUE)
  )

  res <- bed_genomecov(ivls, genome)
  n_bp <- sum(ivls$end - ivls$start)
  n_cov <- sum(res$.depth * (res$end - res$start))
  expect_equal(n_bp, n_cov)
})

test_that("check edge cases at beginning and end", {
  genome <- tribble(
    ~chrom, ~size,
    "chr1", 1000
  )
  ex <- tibble::tribble(
    ~chrom, ~start, ~end, ~.depth,
    "chr1", 0L, 1L, 3L,
    "chr1", 1L, 2L, 1L,
    "chr1", 999L, 1000L, 1L
  )

  # oob intervals are ignored with a warning
  ivls <- tibble(
    chrom = "chr1",
    start = c(rep(0, 3), 1, 999, 1000),
    end = start + 1
  )

  expect_warning(res <- bed_genomecov(ivls, genome))
  expect_true(all(res$start < 1000))

  expect_identical(res, ex)
})

# bed related tests from #https://github.com/arq5x/bedtools2/blob/master/test/genomecov/test-genomecov.sh

y <- tibble::tribble(
  ~chrom, ~start, ~end, ~group, ~score, ~strand,
  "1", 15L, 20L, "y1", 1L, "+",
  "1", 17L, 22L, "y2", 2L, "+"
)

genome <- tibble::tribble(
  ~chrom, ~size,
  "1", 100L,
  "2", 100L,
  "3", 100L
)

test_that("Test with chroms that have no coverage", {
  ex <- tibble::tribble(
    ~chrom, ~start, ~end, ~.depth,
    "1", 15L, 17L, 1L,
    "1", 17L, 20L, 2L,
    "1", 20L, 22L, 1L
  )
  obs <- bed_genomecov(y, genome)
  expect_equal(ex, obs)
})

test_that("Test with chroms that have no coverage", {
  ex <- tibble::tribble(
    ~chrom, ~start, ~end, ~.depth,
    "1", 0L, 15L, 0L,
    "1", 15L, 17L, 1L,
    "1", 17L, 20L, 2L,
    "1", 20L, 22L, 1L,
    "1", 22L, 100L, 0L,
    "2", 0L, 100L, 0L,
    "3", 0L, 100L, 0L
  )

  obs <- bed_genomecov(y, genome, zero_depth = TRUE)
  expect_equal(ex, obs)
})
jayhesselberth/valr documentation built on April 24, 2024, 7:15 a.m.