tests/testthat/test_distance.R

library(kmer)
context("distance computation and tree building")

# simulate a DNA sequence dataset
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
bases <- c("A", "C", "G", "T")
x <- list(sample(bases, replace = TRUE, size = 100))
evolve <- function(a) if(runif(1) > 0.95) sample(bases, 1) else a
for(i in 2:10) x[[i]] <- unname(sapply(x[[i - 1]], evolve))
names(x) <- paste("Sequence", 1:10)
# convert to DNAbin object
rawbases <- as.raw(c(136, 40, 72, 24))
xDNA <- lapply(x, function(s) rawbases[match(s, bases)])
class(xDNA) <- "DNAbin"

# simulate an AA sequence dataset this time an alignment
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
aminos <- LETTERS[-c(2, 10, 15, 21, 24, 26)]
y <- matrix(sample(aminos, replace = TRUE, size = 100), nrow = 1)
evolve <- function(a) if(runif(1) > 0.95) sample(aminos, 1) else a
for(i in 2:10) y <- rbind(y, sapply(y[i - 1,], evolve))
rownames(y) <- paste("Sequence", 1:10)
# convert to AAbin object
rawaminos <- as.raw((65:89)[-c(2, 10, 15, 21, 24, 26)])
yAA <- apply(y, c(1, 2), function(s) rawaminos[match(s, aminos)])
class(yAA) <- "AAbin"

# count k-mers
x.kcounts <- kcount(x)
xDNA.kcounts <- kcount(xDNA)
y.kcounts <- kcount(y, k = 2)
yAA.kcounts <- kcount(yAA, k = 2)

# test Dayhoff compression
tmp <- yAA
tmp[1] <- charToRaw("X")
tmp.kcounts <- kcount(tmp, k = 5)


# generate k-mer distance matrices
x.dist <- kdistance(x, method = "edgar")
xDNA.dist <- kdistance(xDNA, method = "edgar")
y.dist <- kdistance(y, method = "edgar", k = 2)
yAA.dist <- kdistance(yAA, method = "edgar", k = 2)

# embed sequences
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
x.mbed <- mbed(x)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
xDNA.mbed <- mbed(xDNA)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
y.mbed <- mbed(y, k = 2)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
yAA.mbed <- mbed(y, k = 2)

# build divisive trees
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
x.tree <- cluster(x, nstart = 20)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
xDNA.tree <- cluster(xDNA, nstart = 20)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
y.tree <- cluster(y, nstart = 20, k = 2)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
yAA.tree <- cluster(yAA, nstart = 20, k = 2)

# OTU clustering
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
x.otu <- otu(x, nstart = 20)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
xDNA.otu <- otu(xDNA, nstart = 20)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
y.otu <- otu(y, nstart = 20, k = 2)
suppressWarnings(RNGversion("3.5.0"))
set.seed(999)
yAA.otu <- otu(yAA, nstart = 20, k = 2)

test_that("objects have correct classes", {
  expect_is(x.dist, "dist")
  expect_is(y.dist, "dist")
  expect_is(x.mbed, "mbed")
  expect_is(y.mbed, "mbed")
  expect_is(x.tree, "dendrogram")
  expect_is(y.tree, "dendrogram")
})

test_that("character and DNAbin formats give same results", {
  expect_equal(x.kcounts, xDNA.kcounts)
  expect_equal(y.kcounts, yAA.kcounts)
  expect_equal(x.dist, xDNA.dist)
  expect_equal(y.dist, yAA.dist)
  expect_equal(x.mbed[,], xDNA.mbed[,])
  expect_equal(y.mbed[,], yAA.mbed[,])
  expect_equal(x.tree, xDNA.tree)
  expect_equal(y.tree, yAA.tree)
  expect_equal(x.otu, xDNA.otu)
  expect_equal(y.otu, yAA.otu)
})

test_that("object dimensions are correct", {
  expect_equal(ncol(x.kcounts), 1024)
  expect_equal(ncol(y.kcounts), 400)
  expect_equal(length(x.dist), length(y.dist))
  expect_equal(ncol(x.mbed), 9)
  expect_equal(ncol(y.mbed), 9)
  expect_equal(length(x.tree), 2)
  expect_equal(length(y.tree), 2)
  expect_equal(length(x.otu), 10)
  expect_equal(length(y.otu), 10)
})
shaunpwilkinson/kmer documentation built on June 3, 2019, 4:17 a.m.