Nothing
test_that("multiplication works", {
testthat::expect_true(is.list(kmers("ACTGC")))
})
genomes_dir <- file.path("fixtures", "genomes")
test_that("non canonical kmer counting is correct", {
genomes_filepaths <- list.files(genomes_dir, full.names = TRUE)
genomes <- lapply(genomes_filepaths, Biostrings::readDNAStringSet)
tmp_dir <- tempdir()
unlink(tmp_dir, recursive = TRUE)
genomes_to_kmer_libsvm(genomes_dir, tmp_dir, k = 2, canonical = FALSE)
kmer_files <- list.files(tmp_dir, full.names = TRUE)
# read last file and print
kmer_data <- readLines(kmer_files[4])
kmer_data <- unlist(strsplit(kmer_data, " "))
assert_str <- c("0", "1:3", "2:2", "3:1", "4:2",
"5:1", "7:4", "8:6",
"9:1", "10:4",
"13:2", "14:6", "16:1")
expect_true(all.equal(kmer_data, assert_str))
})
test_that("canonical kmer counting is correct", {
genome_str <- "ACACTT"
kmers <- kmers(genome_str, k = 2, canonical = TRUE, anchor = FALSE)
expect_true(all.equal(kmers, list(kmer_string = c("AA", "AC", "AG", "CA"),
kmer_value = c(1, 2, 1, 1))))
})
test_that("test kmer squeeze", {
for (k in 3:8) {
all_mers <- kmers("AAAAAAAAAAAAAAA", k = k, anchor = TRUE)
all_mers <- all_mers$kmer_string
all_mers <- paste0(all_mers, collapse = "")
all_counts <- kmers(all_mers, k = k, anchor = TRUE)
filtered_mers <- all_counts$kmer_string[all_counts$kmer_value > 0]
squeezed_mers <- kmers("AAAAAAAAAAAAAAA", k = k, anchor = TRUE, squeeze = TRUE)
squeezed_mers <- squeezed_mers$kmer_string
expect_true(all.equal(filtered_mers, squeezed_mers))
}
})
test_that("test kmer to libsvm squeeze", {
use_genome <- 6
for (k in 3:8) {
# calculate without squeeze first - use genomes_to_kmer_libsvm()
genomes_filepaths <- list.files(genomes_dir, full.names = TRUE)
tmp_dir <- tempdir()
unlink(tmp_dir, recursive = TRUE)
genomes_to_kmer_libsvm(genomes_dir, tmp_dir, k = k, canonical = TRUE, squeeze = FALSE)
kmer_files <- list.files(tmp_dir, full.names = TRUE)
kmer_data <- kmer_files[use_genome] |>
readLines() |>
strsplit(" ") |>
unlist()
# calculate without squeeze - using kmers() directly
genome_str <- genomes_filepaths[use_genome] |>
Biostrings::readDNAStringSet() |>
as.character()
kmer_data2 <- kmers(genome_str, k = k, canonical = TRUE, squeeze = FALSE)
# get the index of the last recorded kmer:
last_kmer <- max(which(kmer_data2$kmer_value > 0))
# check this is the same as using the libsvm file
last_kmer_libsvm <- max(as.numeric(strsplit(kmer_data[length(kmer_data)], ":")[[1]][1]))
expect_true(last_kmer == last_kmer_libsvm)
# now calculate with squeeze
unlink(tmp_dir, recursive = TRUE)
genomes_to_kmer_libsvm(genomes_dir, tmp_dir, k = k, canonical = TRUE, squeeze = TRUE)
kmer_files <- list.files(tmp_dir, full.names = TRUE)
kmer_data <- kmer_files[use_genome] |>
readLines() |>
strsplit(" ") |>
unlist()
# calculate with squeeze - using kmers() directly
kmer_data2 <- kmers(genome_str, k = k, canonical = TRUE, squeeze = TRUE)
# get the index of the last recorded kmer:
last_kmer <- max(which(kmer_data2$kmer_value > 0))
# check this is the same as using the libsvm file
last_kmer_libsvm <- max(as.numeric(strsplit(kmer_data[length(kmer_data)], ":")[[1]][1]))
expect_true(last_kmer == last_kmer_libsvm)
}
})
test_that("check that can revert the squeeze", {
use_genome <- 6
for (k in 3:8) {
squeezed_mers <- squeezed_mers(k = k)
calculated_mers <- kmers("AAAAAAAAAAAAAAA", k = k, anchor = TRUE, squeeze = TRUE)
expect_true(all.equal(squeezed_mers, calculated_mers$kmer_string))
genomes_filepaths <- list.files(genomes_dir, full.names = TRUE)
tmp_dir <- tempdir()
unlink(tmp_dir, recursive = TRUE)
genomes_to_kmer_libsvm(genomes_dir, tmp_dir, k = k, canonical = TRUE, squeeze = FALSE)
kmer_files <- list.files(tmp_dir, full.names = TRUE)
kmer_data_unsqueezed <- kmer_files[use_genome] |>
readLines() |>
strsplit(" ") |>
unlist()
# calculate with squeeze
unlink(tmp_dir, recursive = TRUE)
genomes_to_kmer_libsvm(genomes_dir, tmp_dir, k = k, canonical = TRUE, squeeze = TRUE)
kmer_files <- list.files(tmp_dir, full.names = TRUE)
kmer_data_squeezed <- kmer_files[use_genome] |>
readLines() |>
strsplit(" ") |>
unlist()
expect_false(isTRUE(all.equal(kmer_data_unsqueezed, kmer_data_squeezed)))
# now unsqueeze
squeezed_indices <- kmer_data_squeezed[2:length(kmer_data_squeezed)]
squeezed_indices <- sapply(squeezed_indices, \(x) strsplit(x, ":")[[1]][[1]], USE.NAMES = F)
squeezed_indices <- as.numeric(squeezed_indices)
squeezed_str <- squeezed_index_to_str(squeezed_indices, k = k)
# compare to unsqueezed data
unsqueezed_indices <- kmer_data_unsqueezed[2:length(kmer_data_unsqueezed)]
unsqueezed_indices <- sapply(unsqueezed_indices, \(x) strsplit(x, ":")[[1]][[1]], USE.NAMES = F)
unsqueezed_indices <- as.numeric(unsqueezed_indices)
unsqueezed_str <- unsqueezed_index_to_str(unsqueezed_indices, k = k)
expect_true(all.equal(squeezed_str, unsqueezed_str))
}
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.