###############################################################################
context("Tests for classes in `dupree` package")
###############################################################################
# - Note that we use seq(4) rather than 1:4 to define the integer-sequence for
# alignment by stringdist::seq_sim; there is an issue with lazily-evaluated
# values in tests on stringdist - see NEWS for version 0.9.5.4 of stringdist
###############################################################################
test_that("EnumeratedCodeTable: construction / validity", {
expect_is(
new("EnumeratedCodeTable"),
"EnumeratedCodeTable",
info = "Constructor for EnumeratedCodeTable"
)
expect_error(
new("EnumeratedCodeTable", blocks = tibble()),
info = paste(
"EnumeratedCodeTable should have `file`, `block`, `start_line` and",
"`enumerated_code columns`"
)
)
default_blocks <- tibble::tibble(
file = character(0), block = integer(0), start_line = integer(0),
enumerated_code = list()
)
expect_equal_tbl(
new("EnumeratedCodeTable")@blocks,
default_blocks,
info = paste(
"Default 'blocks' entry should have no rows, and have",
"file|block|start_line|enumerated_code columns"
)
)
my_blocks <- tibble::tibble(
file = "a",
block = 1,
start_line = 1,
enumerated_code = list(as.integer(c(1, 2, 3, 4, 5)))
)
expect_equal_tbl(
new("EnumeratedCodeTable", my_blocks)@blocks,
my_blocks,
info = paste(
"'blocks' entry should match the defining data-frame"
)
)
})
###############################################################################
test_that("EnumeratedCodeTable: find_best_match_for_single_block", {
# TODO:
})
test_that("EnumeratedCodeTable: find_best_matches", {
# - return at most 1 best-hit for each block
empty_results <- tibble::tibble(
file_a = character(0), file_b = character(0), block_a = integer(0),
block_b = integer(0), line_a = integer(0), line_b = integer(0),
score = numeric(0)
)
# Where block X is the best match for block Y and vice versa, dupree should
# only return a single line: there is no value in reporting
# file_a file_b block_a block_b line_a line_b score
# X X 1 2 10 20 0
# X X 2 1 20 10 0
# # matches the preceding line
# X Y 30 10 350 150 0.5236
# Y X 10 30 150 350 0.5236
# # matches the preceding line (non-matchable score)
# When results are replicated like this, sort file/block alphanumerically:
# so file_a <= file_b and block_a <= block_b
# No overlap between the symbols in the two code-blocks
my_blocks <- tibble::tibble(
file = "a",
block = as.integer(c(1, 2)),
start_line = as.integer(c(1, 2)),
enumerated_code = list(
as.integer(c(1, 2, 3)),
4L
)
)
my_code_table <- new("EnumeratedCodeTable", my_blocks)
expect_equal(
find_best_matches(my_code_table),
tibble::tibble(
file_a = "a", file_b = "a", block_a = 1L, block_b = 2L,
line_a = 1L, line_b = 2L, score = 0
),
info = "find_best_matches on two distinct code-blocks (same file)"
)
expect_equal(
find_best_matches(new("EnumeratedCodeTable", my_blocks[2:1, ])),
find_best_matches(my_code_table),
info = paste(
"when two blocks are mutually-best-matches from the same file,",
"return a single row (we use block_a <= block_b)"
)
)
# Identical code-blocks
identical_blocks <- tibble::tibble(
file = "a",
block = as.integer(c(1, 2)),
start_line = as.integer(c(1, 2)),
enumerated_code = list(
as.integer(c(1, 2, 3)),
as.integer(c(1, 2, 3))
)
)
identical_code_table <- new("EnumeratedCodeTable", identical_blocks)
expect_equal(
find_best_matches(identical_code_table),
tibble::tibble(
file_a = "a", file_b = "a", block_a = 1L, block_b = 2L,
line_a = 1L, line_b = 2L, score = 1
),
info = "find_best_matches on two identical code-blocks (same file)"
)
# Overlapping, non-equal code-blocks, using longest-common-subsequence
nonequal_blocks <- tibble::tibble(
file = letters[1:2], block = 1L, start_line = 1L,
enumerated_code = list(
as.integer(c(1, 2, 3, 4)),
as.integer(c(3, 4, 5, 6))
)
# seq_dist_LCS = 4; length_sum = 8; seq_sim = 1 - dist/len_sum = 0.5
)
nonequal_blocks_tbl <- new("EnumeratedCodeTable", nonequal_blocks)
expect_equal(
find_best_matches(nonequal_blocks_tbl),
tibble::tibble(
file_a = c("a"), file_b = c("b"), block_a = 1L, block_b = 1L,
line_a = 1L, line_b = 1L, score = 1 / 2
),
info = "find_best_matches on non-equal code-blocks (LCS; different file)"
)
expect_equal(
find_best_matches(new("EnumeratedCodeTable", nonequal_blocks[2:1, ])),
find_best_matches(nonequal_blocks_tbl),
info = paste(
"when two blocks are mutually-best-matches from different files,",
"return a single row (we use file_a <= file_b alphanumerically)"
)
)
# - if there's 1 or fewer blocks, return an empty data-frame
single_block <- tibble::tibble(
file = "a",
block = 1L,
start_line = 1L,
enumerated_code = list(as.integer(c(1, 2, 3, 4)))
)
single_code_table <- new("EnumeratedCodeTable", single_block)
expect_equal(
find_best_matches(single_code_table),
empty_results,
info = paste(
"A single code-block can't be compared to anything: results should be",
"empty"
)
)
# - If A-B, B-A and C-A are optimal then B-C shouldn't be in the results
three_blocks <- tibble::tibble(
file = "a",
block = as.integer(c(1, 2, 3)),
start_line = as.integer(c(1, 2, 3)),
# A) 1-2-3-4-5
# B) 1-2-3-6-5 (A-B: 2)
# C) 7-2-3-4-8 (A-C: 4; B-C: 6)
enumerated_code = list(
as.integer(c(1, 2, 3, 4, 5)),
as.integer(c(1, 2, 3, 6, 5)),
as.integer(c(7, 2, 3, 4, 8))
)
)
triple_code_table <- new("EnumeratedCodeTable", three_blocks)
expect_equal(
find_best_matches(triple_code_table),
tibble::tibble(
file_a = "a", file_b = "a", block_a = rep(1L, 2), block_b = 2:3,
line_a = 1L, line_b = 2:3, score = c(1 - 2 / 10, 1 - 4 / 10)
),
info = "no cycles should be present in the results (by default)"
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.