library(sumrep)
context("Summary statistics")
test_that("test.binContinuousListsAsDiscrete", {
l1 <- 1:20
l2 <- rep(1, 20)
binned <- binContinuousListsAsDiscrete(l1, l2)
bin_a <- binned[[1]]
bin_b <- binned[[2]]
expect_equal(rep(4, 5), bin_a)
expect_equal(c(20, rep(0, 4)), bin_b)
})
test_that("test.compareDistanceFromGermlineToSequenceDistributions", {
naive_a <- c("AAAAAA")
naive_b <- c("GGGGGG")
m1 <- c("AAAAAA")
m2 <- c("AAAAAG")
m3 <- c("GAAAAA")
m4 <- c("AAAGGG")
m5 <- c("GGGG")
dat_a <- data.table(germline_alignment=naive_a, sequence_alignment=list(m1, m2, m3), stop_codon=F, vj_in_frame=T)
dat_b <- data.table(germline_alignment=naive_b, sequence_alignment=list(m2, m3, m4), stop_codon=F, vj_in_frame=T)
c1 <- compareDistanceFromGermlineToSequenceDistributions(dat_a,
dat_a,
approximate=FALSE,
v_gene_only=FALSE
)
c2 <- compareDistanceFromGermlineToSequenceDistributions(dat_a,
dat_b,
approximate=FALSE,
v_gene_only=FALSE
)
c3 <- compareDistanceFromGermlineToSequenceDistributions(dat_b,
dat_a,
approximate=FALSE,
v_gene_only=FALSE)
expect_equal(0, c1)
expect_equal(c3, c2)
expect_true(c2 > 0)
})
test_that("test.compareGCContentDistributions", {
dat_a <- data.table(sequence_alignment=seq_2, stop_codon=F, vj_in_frame=T)
dat_b <- data.table(sequence_alignment=seq_7, stop_codon=F, vj_in_frame=T)
dat_c <- data.table(sequence_alignment=seq_8, stop_codon=F, vj_in_frame=T)
dat_d <- data.table(sequence_alignment=seq_9, stop_codon=F, vj_in_frame=T)
expect_equal(0, compareGCContentDistributions(dat_a, dat_b))
c1 <- compareGCContentDistributions(dat_c, dat_d)
c2 <- compareGCContentDistributions(dat_d, dat_c)
expect_true(c1 > 0)
expect_true(c1 == c2)
})
test_that("test.comparePairwiseDistanceDistributions", {
s1 <- data.table(sequence_alignment=c("AAA", "AAT", "ATT"), stop_codon=F, vj_in_frame=T)
s2 <- data.table(sequence_alignment=c("ATT", "AAA", "AAT"), stop_codon=F, vj_in_frame=T)
s3 <- data.table(sequence_alignment=c("AAA", "AAT", "TTT"), stop_codon=F, vj_in_frame=T)
s4 <- data.table(sequence_alignment=c("AAA", "ATC", "GGG"), stop_codon=F, vj_in_frame=T)
expect_equal(0,
comparePairwiseDistanceDistributions(s1,
s2,
approximate=FALSE
))
c1 <- comparePairwiseDistanceDistributions(s1,
s3,
approximate=FALSE)
c2 <- comparePairwiseDistanceDistributions(s1,
s4,
approximate=FALSE)
expect_true(c1 < c2)
c3 <- comparePairwiseDistanceDistributions(s4,
s1,
approximate=FALSE)
expect_equal(c3, c2)
})
test_that("test.getColdspotCountDistribution", {
seq_a <- "TTTTT"
seq_b <- c("GCC", "AAA", "AAAA")
seq_c <- c("AAA", "CTC")
seq_d <- "SYCSYC"
expect_equal(0,
getColdspotCountDistribution(data.table(junction=seq_a,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
expect_equal(c(1/3, 0, 0),
getColdspotCountDistribution(data.table(junction=seq_b,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
expect_equal(c(1, 0, 0),
getColdspotCountDistribution(data.table(junction=seq_b,
stop_codon=F,
vj_in_frame=T
),
column="junction",
average_over_sequence_length=FALSE
) %>% unname)
expect_equal(c(0, 1/3),
getColdspotCountDistribution(data.table(junction=seq_c,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
expect_equal(4/6,
getColdspotCountDistribution(data.table(junction=seq_d,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
})
test_that("test.getDistanceMatrix", {
m <- matrix(0, 2, 2) %>% applyRowAndColumnNames
expect_equal(m, getDistanceMatrix(seq_1))
m2 <- matrix(c(0, 1, 1, 0), 2, 2) %>% applyRowAndColumnNames
expect_equal(m2, getDistanceMatrix(seq_2))
m3 <- matrix(c(0, 4, 4, 0), 2, 2) %>% applyRowAndColumnNames
expect_equal(m3, getDistanceMatrix(seq_3))
m4 <- matrix(0, 2, 2) %>% applyRowAndColumnNames
expect_equal(m4, getDistanceMatrix(seq_4))
})
test_that("test.getDistanceFromGermlineToSequenceDistribution", {
naives <- c("AAAAAC", rep(c("AAAAAA"), 3))
m1 <- c("AAAAAT")
m2 <- c("CGCAAA")
m3 <- c("GGGGGG")
m4 <- c("AAAAAA")
dat <- data.table(germline_alignment=naives, sequence_alignment=c(m1, m2, m3, m4), stop_codon=F, vj_in_frame=T)
expect_equal(c(0, 1, 3, 6), getDistanceFromGermlineToSequenceDistribution(dat,
v_gene_only=FALSE
))
dat$v_gl_seq <- c("AAA", "CGC", "GGG", "AAA")
dat$v_qr_seqs <- rep("AAA", 4)
expect_equal(c(0, 0, 3, 3),
getDistanceFromGermlineToSequenceDistribution(dat,
v_gene_only=TRUE
)
)
})
test_that("test.getDistanceVector", {
expect_equal(0, getDistanceVector(seq_1))
expect_equal(1, getDistanceVector(seq_2))
expect_equal(4, getDistanceVector(seq_3))
expect_equal(0, getDistanceVector(seq_4))
expect_equal(2, getDistanceVector(seq_7))
})
test_that("CDR3 pairwise distances functions", {
dat <- data.table(junction=c("AAACCC", "AAACCC", "ACTCAT"),
junction_aa=c("KP", "KP", "TH"),
stop_codon=F,
vj_in_frame=T
)
dist_1 <- dat %>%
getCDR3PairwiseDistanceDistribution(
approximate=FALSE,
by_amino_acid=FALSE
)
expect_equal(c(0, 4, 4), dist_1)
dist_2 <- dat %>%
getCDR3PairwiseDistanceDistribution(
approximate=FALSE,
by_amino_acid=TRUE
)
expect_equal(c(0, 2, 2), dist_2)
})
test_that("test.getGCContentDistribution", {
d1 <- rep(0, 3)
expect_equal(d1, getGCContentDistribution(dat_1, approximate=FALSE))
d2 <- c(0, 0.75, 1, 0.5)
expect_equal(d2, getGCContentDistribution(dat_2, approximate=FALSE))
})
test_that("test.getGRAVYDistribution", {
# Sample GRAVY values for a few particular amino acids obtained from
# https://github.com/PRIDE-Utilities/pride-utilities/wiki/1.2-GRAVY-Calculations
alanine <- data.table(junction_aa=convertNucleobasesToAminoAcids("GCC"),
stop_codon=F,
vj_in_frame=T
)
glutamine <- data.table(junction_aa=convertNucleobasesToAminoAcids("CAA"),
stop_codon=F,
vj_in_frame=T
)
isoleucine <- data.table(junction_aa=convertNucleobasesToAminoAcids("ATT"),
stop_codon=F,
vj_in_frame=T
)
expect_equal(1.8, getGRAVYDistribution(alanine))
expect_equal(-3.5, getGRAVYDistribution(glutamine))
expect_equal(4.5, getGRAVYDistribution(isoleucine))
expect_equal(c(1.8, -3.5, 4.5),
getGRAVYDistribution(rbind(alanine, glutamine, isoleucine)))
})
test_that("test.getHotspotCountDistribution", {
seq_a <- "TTTTT"
seq_b <- c("AAC", "TTTT")
seq_c <- "NWRC"
seq_d <- c("WRC", "WRC", "WGC")
expect_equal(0, getHotspotCountDistribution(data.table(junction=seq_a,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
expect_equal(c(2/3, 0), getHotspotCountDistribution(data.table(junction=seq_b,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
expect_equal(c(2, 0), getHotspotCountDistribution(data.table(junction=seq_b,
stop_codon=F,
vj_in_frame=T
),
column="junction",
average_over_sequence_length=FALSE
) %>% unname)
expect_equal(3/4, getHotspotCountDistribution(data.table(junction=seq_c,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
expect_equal(c(2/3, 2/3, 1/3), getHotspotCountDistribution(data.table(junction=seq_d,
stop_codon=F,
vj_in_frame=T
),
column="junction") %>% unname)
})
test_that("test.getNearestNeighborDistances", {
d1 <- c(0, 0)
expect_equal(d1, getNearestNeighborDistances(seq_1))
d2 <- c(1, 1, 2)
expect_equal(d2, getNearestNeighborDistances(seq_5) %>% sort)
expect_equal(c(3, 3, 3, 3), getNearestNeighborDistances(seq_6,
k = 1))
expect_equal(c(4, 3, 3, 3), getNearestNeighborDistances(seq_6,
k = 2))
expect_equal(c(4, 4, 4, 3), getNearestNeighborDistances(seq_6,
k = 3))
})
test_that("test.getPositionalPositionalDistanceBetweenMutations", {
germline_alignment <- c("AAAA", "AAAA", "TTTT", "GGGGGGG")
sequence_alignment <- c("AAAA", "ATAG", "TTAT", "CGGCGCG")
expect_equal(getPositionalDistancesBetweenMutationsBySequence(germline_alignment[1], sequence_alignment[1]), NA)
expect_equal(getPositionalDistancesBetweenMutationsBySequence(germline_alignment[2], sequence_alignment[2]), 2)
expect_equal(getPositionalDistancesBetweenMutationsBySequence(germline_alignment[3], sequence_alignment[3]), NA)
expect_equal(getPositionalDistancesBetweenMutationsBySequence(germline_alignment[4], sequence_alignment[4]) %>%
sort,
c(2, 3))
expect_equal(getPositionalDistanceBetweenMutationsDistribution(
data.table(germline_alignment=germline_alignment,
sequence_alignment=sequence_alignment,
stop_codon=F,
vj_in_frame=T
)
) %>% sort,
c(2, 2, 3))
})
test_that("getClusterSizes returns the correct cluster size distribution", {
dat <- data.frame(clone=c(13, 14, 15, 13, 13, 14, 14, 13, 16, 17))
cluster_sizes <- dat %>%
getClusterSizeDistribution(column="clone") %>%
sort
expect_equal(c(1, 1, 1, 3, 4), cluster_sizes)
dat$clone_id <- dat$clone
cluster_sizes <- dat %>%
getClusterSizeDistribution %>%
sort
expect_equal(c(1, 1, 1, 3, 4), cluster_sizes)
expect_equal(getHillNumbers(dat, c(0, 2)), c(5, 3.571429), tolerance=0.001)
})
test_that("getInsertionMatrix functions return correct transition matrices", {
dat <- data.frame(vd_insertion=c("aa", "CC", "xttx", "gga", "xyz"),
dj_insertion=c("aAa", "cc", "hello", "yo", "sup"))
vd_truth <- matrix(0, 4, 4)
rownames(vd_truth) <- c('a', 'c', 'g', 't')
colnames(vd_truth) <- c('a', 'c', 'g', 't')
vd_truth['a', 'a'] <- 0.2
vd_truth['c', 'c'] <- 0.2
vd_truth['t', 't'] <- 0.2
vd_truth['g', 'g'] <- 0.2
vd_truth['g', 'a'] <- 0.2
vd_mat <- dat %>% getVDInsertionMatrix
expect_equal(vd_truth, vd_mat)
dj_truth <- matrix(0, 4, 4)
rownames(dj_truth) <- c('a', 'c', 'g', 't')
colnames(dj_truth) <- c('a', 'c', 'g', 't')
dj_truth['a', 'a'] <- 2/3
dj_truth['c', 'c'] <- 1/3
dj_mat <- dat %>% getDJInsertionMatrix
expect_equal(dj_truth, dj_mat)
})
test_that("getInFramePercentage returns the correct percentage of in-frame sequences", {
dat_a <- data.frame(vj_in_frame=c(TRUE, FALSE, TRUE, FALSE))
dat_b <- data.frame(vj_in_frame=c(TRUE, FALSE, TRUE, TRUE))
expect_equal(getInFramePercentage(dat_a), 50)
expect_equal(getInFramePercentage(dat_b), 75)
expect_equal(compareInFramePercentages(dat_a, dat_b), 25)
})
test_that("Gene usage comparison", {
dat_a <- data.frame(v_call=c("IGHV3-30*18",
"IGHV4-31*03",
"IGHV1-3*01"),
d_call=c("IGHD6-19*01",
"IGHD2-15*01",
"IGHD1-1*01"),
j_call=c("IGHJ6*02",
"IGHJ5*02",
"IGHJ4*02")
)
dat_b <- data.frame(v_call=c("IGHV3-30*18",
"IGHV2-01*03",
"IGHV1-3*02"),
d_call=c("IGHD6-19*02",
"IGHD2-11*01",
"IGHD1-1*02"),
j_call=c("IGHJ6*02",
"IGHJ5*02",
"IGHJ4*02")
)
expect_equal(4, compareVGeneDistributions(dat_a,
dat_b,
collapse_alleles=FALSE,
standardize=FALSE
)
)
expect_equal(4/3, compareVGeneDistributions(dat_a,
dat_b,
collapse_alleles=FALSE,
standardize=TRUE
)
)
expect_equal(2, compareVGeneDistributions(dat_a,
dat_b,
collapse_alleles=TRUE,
standardize=FALSE
)
)
expect_equal(2/3, compareVGeneDistributions(dat_a,
dat_b,
collapse_alleles=TRUE,
standardize=TRUE
)
)
expect_equal(2/3, compareVGeneDistributions(dat_a,
dat_b,
collapse_alleles=TRUE
)
)
expect_equal(6/3, compareDGeneDistributions(dat_a,
dat_b,
collapse_alleles=FALSE
)
)
expect_equal(2/3, compareDGeneDistributions(dat_a,
dat_b,
collapse_alleles=TRUE
)
)
expect_equal(0, compareJGeneDistributions(dat_a,
dat_b,
collapse_alleles=FALSE
)
)
expect_equal(6, compareVDJDistributions(dat_a,
dat_b,
collapse_alleles=FALSE,
by_frequency=FALSE
)
)
expect_equal(2, compareVDJDistributions(dat_a,
dat_b,
collapse_alleles=FALSE,
by_frequency=TRUE
)
)
expect_equal(2/3, compareVDJDistributions(dat_a,
dat_b,
collapse_alleles=TRUE,
by_frequency=TRUE
)
)
expect_equal(2, compareVDJDistributions(dat_a,
dat_b,
collapse_alleles=TRUE,
by_frequency=FALSE
)
)
expect_equal(2/3, compareVJDistributions(dat_a,
dat_b,
collapse_alleles=TRUE,
by_frequency=TRUE
)
)
expect_equal(4/3, compareVJDistributions(dat_a,
dat_b,
collapse_alleles=FALSE,
by_frequency=TRUE
)
)
expect_equal(4, compareVJDistributions(dat_a,
dat_b,
collapse_alleles=FALSE,
by_frequency=FALSE
)
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.