tests/testthat/test-op-string.R

context("String operations on sequences")

test_that("Pattern detection works", {
  x <- dna(c("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA))
  expect_equal(seq_detect_pattern(x, dna("CCG", "AAA", "GGG")),
               c(TRUE, TRUE, NA))
  expect_equal(seq_detect_pattern(x, "^A.{2}T"), c(TRUE, FALSE, NA))
})


test_that("Fuzzy pattern detection works", {
  x <- dna(c("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA))
  expect_equal(seq_detect_pattern(x, dna("TTA"), max_error = 0.3),
               c(TRUE, FALSE, NA))
  expect_equal(seq_detect_pattern(x, dna("TTA"), max_error = 0.4),
               c(TRUE, TRUE, NA))
  expect_equal(seq_detect_pattern(x, dna("ACGA", "CTA", "GGG"),
                                  max_error = 0.3),
               c(TRUE, FALSE, NA))
  expect_equal(seq_detect_pattern(x, dna("ACGA", "CTA", "GGG"),
                                  max_error = c(0.3, 0.4)),
               c(TRUE, TRUE, NA))

})

test_that("Crop patterns works", {
  x <- dna("ACGTTAAAAAGTGTAGCCCCCGT", "CTCGAAATGA", NA)
  expect_equal(seq_crop_pattern(x, pattern_in = "AAAA", pattern_out = "CCCC",
                                include_patterns = FALSE),
               dna("AGTGTAGC", NA, NA))

  expect_equal(seq_crop_pattern(x, pattern_in = "AAAA", pattern_out = "CCCC",
                                include_patterns = TRUE),
               dna("AAAAAGTGTAGCCCCC", NA, NA))
})


test_that("Fuzzy crop patterns works", {
  x <- dna("TCTCTAAAAAAAAAAAAAAATCTCT")
  a <- dna(NA)
  b <- dna("TAAAAAAAAAAAAAAAT")
  c <- dna("AAAAAAA")
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TACAA"), pattern_out = dna("AAAAT"), max_error_in = 0.1), a)
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TACAA"), pattern_out = dna("AAAAT"), max_error_in = 0.2), b)
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TACAA"), pattern_out = dna("AAAAT"), max_error_in = 0.2, include_patterns = FALSE), c)
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TAAAA"), pattern_out = dna("AGAAT"), max_error_in = 0.2), a)
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TAAAA"), pattern_out = dna("AGAAT"), max_error_out = 0.2), b)
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TAAAA"), pattern_out = dna("AGAAT"), max_error_out = 0.2, include_patterns = FALSE), c)
  expect_equal(seq_crop_pattern(x, pattern_in = dna("TAAAA"), pattern_out = dna("ANAAT"), max_error_out = 0.1), b)
})



test_that("Pattern extraction works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA)
  expect_equal(seq_extract_pattern(x, dna("AAA")),
               list(dna(), dna("AAA"), dna(NA)))
  expect_equal(seq_extract_pattern(x, "T.G"),
               list(dna("TAG", "TAG"), dna("TCG"), dna(NA)))
})


test_that("Pattern deletion works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA)
  expect_equal(seq_remove_pattern(x, dna("AAA")),
               dna("ACGTTAGTGTAGCCGT", "CTCGTGA", NA))
  expect_equal(seq_remove_pattern(x, "^A.{2}T"),
               dna("TAGTGTAGCCGT", "CTCGAAATGA", NA))
})

test_that("Pattern replacement works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA)
  expect_equal(seq_replace_pattern(x, dna("AAA"), dna("GGGGGG")),
               dna("ACGTTAGTGTAGCCGT", "CTCGGGGGGGTGA", NA))
  expect_equal(seq_replace_pattern(x, "^A.{2}T", "TTTTTT"),
               dna("TTTTTTTAGTGTAGCCGT", "CTCGAAATGA", NA))
})


test_that("Pattern split works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA)
  expect_equal(seq_split_pattern(x, dna("AAA")),
               list(dna("ACGTTAGTGTAGCCGT"), dna("CTCG", "TGA"), dna(NA)))
  expect_equal(seq_split_pattern(x, "T.G"),
               list(dna("ACGT", "TG", "CCGT"), dna("C", "AAATGA"), dna(NA)))
})

test_that("Pattern count works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA", NA)
  expect_equal(seq_count_pattern(x, dna("AAA")), c(0, 1, NA))
  expect_equal(seq_count_pattern(x, "T.G"), c(2, 1, NA))
})


test_that("Crop by position works", {
  x <- dna("ACGTTAGTGTAGCCGT")
  expect_equal(seq_crop_position(x, position_in = 4),
               dna("TTAGTGTAGCCGT"))
  expect_equal(seq_crop_position(x, position_in = 4, position_out = 6),
               dna("TTA"))
})

test_that("Remove by position works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA")
  expect_equal(seq_remove_position(x, 2, 6),
               dna("AGTGTAGCCGT", "CATGA"))
  expect_equal(seq_remove_position(x, 1:2, 1:2),
               dna("CGTTAGTGTAGCCGT", "CCGAAATGA"))
})

test_that("Replace by position works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA")
  expect_equal(seq_replace_position(x, c(5, 2), 6, "-------"),
               dna("ACGT-------GTGTAGCCGT", "C-------ATGA"))
})

test_that("Extract by position works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA")
  expect_equal(seq_extract_position(x, 3, 8),
               dna("GTTAGT", "CGAAAT"))
})

test_that("Combine sequences works", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA")
  y <- dna("TTTTTTT", "AAAAAAAAA")
  expect_equal(seq_combine(x, y),
               dna("ACGTTAGTGTAGCCGTTTTTTTT", "CTCGAAATGAAAAAAAAAA"))
  expect_equal(seq_combine(y, x, sep = "CCCCC"),
               dna("TTTTTTTCCCCCACGTTAGTGTAGCCGT", "AAAAAAAAACCCCCCTCGAAATGA"))
  expect_equal(seq_combine(y, x, sep = "CCCCC", collapse = "GGGGG"),
               dna("TTTTTTTCCCCCACGTTAGTGTAGCCGTGGGGGAAAAAAAAACCCCCCTCGAAATGA"))
})


test_that("Combine sequences from two different classes fails", {
  x <- dna("ACGTTAGTGTAGCCGT", "CTCGAAATGA")
  y <- rna("UUUUUG", "AAAAAAAAA")
  expect_error(seq_combine(x, y),
               "Vectors must be of same class to be combined.")
})

test_that("Split kmers works", {
  x <- dna("ACGTT")
  expect_equal(seq_split_kmer(x, k = 3), list(dna("ACG", "CGT", "GTT")))
  expect_warning(seq_split_kmer(x, k = 10), "k was larger than the sequence")
})


test_that("Remove gaps works", {
  x <- dna("ACGTTAGT-------GTAGCCGT", "----CTCGAAA", NA)
  expect_equal(seq_remove_gap(x), dna("ACGTTAGTGTAGCCGT", "CTCGAAA", NA))
})

Try the bioseq package in your browser

Any scripts or data that you put into this service are public.

bioseq documentation built on Sept. 6, 2022, 5:07 p.m.