source("testUtils.R")
#########################
context("Distance data")
#########################
test_that("arguments are checked", {
expect_error(distances(matrix(0, nrow = 5, ncol = 5), file = "file"), "specify either matrix or file")
expect_error(distances(), "specify matrix or file")
expect_error(distances(file = 123), "should be a file path")
expect_error(distances(file = "i/do/not/exist"), "does not exist")
expect_error(distances(data = "whoops"), "a matrix or a data frame")
expect_error(distances(matrix(letters[1:25], nrow = 5, ncol = 5)), "should be numeric")
expect_error(distances(matrix(0, nrow = 5, ncol = 5)), "names are required")
m <- matrix(1:25, nrow = 5, ncol = 5)
rownames(m) <- colnames(m) <- 1:5
expect_error(distances(m), "matrix should be symmetric")
})
test_that("class", {
expect_is(distanceData(), "chdist")
expect_is(distanceData()$data, "matrix")
})
test_that("read distance data from file", {
# default dataset
dist <- distanceData()
expect_equal(dist$file, distanceFile())
data <- read.autodelim(distanceFile())
data$NAME <- NULL
matrix <- as.matrix(data)
expect_equal(dist$size, 218)
expect_equal(dist$data, matrix)
expect_equal(dist$ids, getIds())
expect_equal(rownames(dist$data), dist$ids)
expect_equal(colnames(dist$data), dist$ids)
expect_equal(dist$names, getNames())
# small dataset
dist <- distanceData(size = "small")
expected <- matrix(c(
0.0, 0.2, 0.4, 0.6, 0.8,
0.2, 0.0, 0.2, 0.4, 0.6,
0.4, 0.2, 0.0, 0.1, 0.4,
0.6, 0.4, 0.1, 0.0, 0.2,
0.8, 0.6, 0.4, 0.2, 0.0
), nrow = 5, ncol = 5)
rownames(expected) <- colnames(expected) <- getIds(size = "small")
expect_equal(dist$data, expected)
expect_equal(dist$size, 5)
expect_equal(dist$ids, getIds(size = "small"))
expect_equal(dist$names, getNames(size = "small"))
})
test_that("create distance data from matrix", {
# 1: default dataset
data <- read.autodelim(distanceFile())
matrix <- data
matrix$NAME <- NULL
matrix <- as.matrix(matrix)
# as data frame (with names)
dist <- distances(data)
expect_true(is.null(dist$file))
expect_equal(dist$size, 218)
expect_equal(dist$data, matrix)
expect_equal(dist$ids, getIds())
expect_equal(dist$names, getNames())
# as numeric matrix (no explicit names)
dist <- distances(matrix)
expect_true(is.null(dist$file))
expect_equal(dist$size, 218)
expect_equal(dist$data, matrix)
expect_equal(dist$ids, getIds())
expect_equal(dist$names, getIds())
# 2: small dataset
matrix <- matrix(c(
0.0, 0.2, 0.4, 0.6, 0.8,
0.2, 0.0, 0.2, 0.4, 0.6,
0.4, 0.2, 0.0, 0.1, 0.4,
0.6, 0.4, 0.1, 0.0, 0.2,
0.8, 0.6, 0.4, 0.2, 0.0
), nrow = 5, ncol = 5)
rownames(matrix) <- colnames(matrix) <- getIds(size = "small")
# as numeric matrix (no explicit names)
dist <- distances(matrix)
expect_true(is.null(dist$file))
expect_equal(dist$size, 5)
expect_equal(dist$data, matrix)
expect_equal(dist$ids, getIds(size = "small"))
expect_equal(dist$names, getIds(size = "small"))
# as data frame (no explicit names)
dist <- distances(as.data.frame(matrix))
expect_true(is.null(dist$file))
expect_equal(dist$size, 5)
expect_equal(dist$data, matrix)
expect_equal(dist$ids, getIds(size = "small"))
expect_equal(dist$names, getIds(size = "small"))
# as data frame (with names)
data <- cbind(NAME = c(NA, NA, "Bob", "Bob", NA), as.data.frame(matrix))
dist <- distances(data)
expect_true(is.null(dist$file))
expect_equal(dist$size, 5)
expect_equal(dist$data, matrix)
expect_equal(dist$ids, getIds(size = "small"))
expect_equal(dist$names, getNames(size = "small"))
})
test_that("print", {
data <- distanceData()
expect_output(print(data), "# Precomputed distance matrix")
expect_output(print(data), "accessions = 218")
expect_output(print(data), "extdata[\\/]*distances.csv")
data <- distanceData(size = "small")
expect_output(print(data), "# Precomputed distance matrix")
expect_output(print(data), "accessions = 5")
expect_output(print(data), 'Ids: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob\'" "Carol"')
expect_output(print(data), 'Names: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob" "Carol"')
expect_output(print(data), "testthat[\\/]*data[\\/]*distances-small.txt")
})
########################
context("Genotype data")
########################
test_that("arguments are checked", {
expect_error(genotypes(), "specify data or file")
expect_error(genotypes(file = 124, format = "default"), "should be a file path")
expect_error(genotypes(file = "i/do/not/exist", format = "biparental"), "does not exist")
expect_error(genotypes(data = testData()$geno$data, file = genotypeFile()), "not both")
expect_error(genotypes(data = "123", format = "default"), "data frame")
expect_error(genotypes(data = "abc", format = "biparental"), "matrix or data frame")
expect_error(genotypes(data = 1:3, format = "frequency"), "matrix or data frame")
m <- matrix(1:100, 10, 10)
expect_error(genotypes(data = m, format = "biparental"), "0, 1, 2")
m <- matrix(sample(c("0","1","2"), replace = TRUE, size = 100), 10, 10)
expect_error(genotypes(data = m, format = "biparental"), "matrix should be numeric")
m <- matrix(sample(c(0,1,2), replace = TRUE, size = 100), 10, 10)
expect_error(genotypes(data = m, format = "biparental"), "row names are required")
rownames(m) <- letters[1:10]
expect_silent(genotypes(data = m, format = "biparental"))
expect_error(genotypes(data = m, format = "freq"), "values between 0.0 and 1.0")
m <- matrix(runif(100), 10, 10)
expect_error(genotypes(data = m, format = "freq"), "row names")
rownames(m) <- letters[1:10]
expect_error(genotypes(data = m, format = "freq"), "column names")
colnames(m) <- letters[1:10]
alleles <- c("x", "y", "z")
expect_error(genotypes(data = m, alleles, format = "freq"), "number of data columns")
df <- data.frame(1:10)
colnames(df) <- NULL
expect_error(genotypes(df, format = "default"), "column names")
m <- matrix(sample(c(0,1,2), replace = TRUE, size = 100), 10, 10)
rownames(m) <- 1:10
expect_error(genotypes(data = m), "specify data format")
})
test_that("class", {
expect_is(genotypeData(), "chgeno")
expect_is(genotypeData()$data, "data.frame")
})
test_that("warned when reading frequency or biparental data as default", {
expect_warning(genotypes(
file = genotypeFile(format = "biparental"),
format = "default"
), "'biparental' format, not 'default'")
expect_warning(genotypes(
file = genotypeFile(format = "biparental", size = "small"),
format = "default"
), "'biparental' format, not 'default'")
expect_warning(genotypes(
file = genotypeFile(format = "frequency"),
format = "default"
), "'frequency' format, not 'default'")
expect_warning(genotypes(
file = genotypeFile(format = "frequency", size = "small"),
format = "default"
), "'frequency' format, not 'default'")
})
test_that("read genotype data from file", {
# 1: default dataset
for(format in c("default", "biparental", "frequency")){
geno <- genotypeData(format = format)
expect_equal(geno$file, genotypeFile(format = format))
expect_equal(geno$size, 218)
expect_equal(geno$ids, getIds())
expect_equal(rownames(geno$data), geno$ids)
expect_equal(geno$names, getNames())
expect_equal(geno$markers, getMarkerNames())
expect_equal(names(geno$alleles), geno$markers)
expect_equal(length(geno$alleles), geno$java$getNumberOfMarkers())
for(m in 1:length(geno$alleles)){
expect_equal(length(geno$alleles[[m]]), geno$java$getNumberOfAlleles(toJavaIndices(m)))
if(format == "default"){ # homozygous test data
expected <- unique(geno$data[,m])
expected <- as.character(expected[!is.na(expected)])
expect_equal(sort(geno$alleles[[m]]), sort(expected))
} else if(format == "biparental"){
expect_equal(geno$alleles[[m]], c("0", "1"))
} else {
expect_true(all(is.na(geno$alleles[[m]])))
}
}
if(format == "frequency" || format == "biparental"){
expect_is(geno$data, "matrix")
expect_true(is.numeric(geno$data))
} else {
expect_is(geno$data, "data.frame")
}
expect_equal(geno$format, format)
}
# 2: small dataset (default format)
geno <- genotypeData(size = "small", format = "default")
expect_equal(geno$size, 5)
expect_equal(geno$ids, getIds(size = "small"))
expect_equal(rownames(geno$data), geno$ids)
expect_equal(geno$names, getNames(size = "small"))
expect_equal(geno$markers, getMarkerNames(size = "small"))
expect_equal(names(geno$alleles), geno$markers)
expect_equal(length(geno$alleles), 4)
expect_equal(geno$alleles[[1]], c("1", "2", "3"))
expect_equal(geno$alleles[[2]], c("A", "B", "C", "D"))
expect_equal(geno$alleles[[3]], c("a1", "a2"))
expect_equal(geno$alleles[[4]], c("+", "-"))
expect_equal(geno$format, "default")
# 3: small dataset (biparental format)
geno <- genotypeData(size = "small", format = "biparental")
expect_equal(geno$size, 5)
expect_equal(geno$ids, getIds(size = "small"))
expect_equal(rownames(geno$data), geno$ids)
expect_equal(geno$names, getNames(size = "small"))
expect_equal(geno$markers, getMarkerNames(size = "small"))
expect_equal(names(geno$alleles), geno$markers)
expect_equal(length(geno$alleles), 4)
for(m in 1:4){
expect_equal(geno$alleles[[m]], c("0", "1"))
}
expect_equal(geno$format, "biparental")
# 4: small dataset (frequency format)
geno <- genotypeData(size = "small", format = "frequency")
expect_equal(geno$size, 5)
expect_equal(geno$ids, getIds(size = "small"))
expect_equal(rownames(geno$data), geno$ids)
expect_equal(geno$names, getNames(size = "small"))
expect_equal(geno$markers, getMarkerNames(size = "small"))
expect_equal(names(geno$alleles), geno$markers)
expect_equal(length(geno$alleles), 4)
expect_equal(geno$alleles[[1]], c("mk1-1", "mk1-2", "mk1-3"))
expect_equal(geno$alleles[[2]], c("mk2-1", "mk2-2"))
expect_equal(geno$alleles[[3]], c(NA, "mk3-2", NA))
expect_equal(geno$alleles[[4]], c("mk4-1", "mk4-2", "mk4-3", "mk4-4"))
expect_equal(geno$format, "frequency")
})
test_that("create default genotype data from data frame", {
ids <- paste("g", 1:5, sep = "-")
names <- c("Alice", "Bob", "Carol", "Dave", "Eve")
df <- data.frame(
NAME = names,
M1.1 = c(1,2,1,2,1),
M1.2 = c(3,2,2,3,1),
M2.1 = c("B","C","D","B",NA),
M2.2 = c("B","A","D","B",NA),
M3.1 = c("a1","a1","a2","a2","a1"),
M3.2 = c("a1","a2","a2","a1","a1"),
M4.1 = c(NA,"+","+","+","-"),
M4.2 = c(NA,"-","+","-","-"),
row.names = ids
)
df.no.names <- df
df.no.names$NAME <- NULL
markers <- c("M1", "M2", "M3", "M4")
alleles <- list(
M1 = as.character(1:3),
M2 = c("A", "B", "C", "D"),
M3 = c("a1", "a2"),
M4 = c("+", "-")
)
# without names
geno <- genotypes(df.no.names, format = "default")
expect_equal(geno$size,5)
expect_equal(geno$ids, ids)
expect_equal(geno$names, ids)
expect_equal(geno$data, df.no.names)
expect_equal(geno$markers, markers)
expect_equal(geno$alleles, alleles)
expect_equal(geno$format, "default")
# with names
geno <- genotypes(df, format = "default")
expect_equal(geno$size,5)
expect_equal(geno$ids, ids)
expect_equal(geno$names, names)
expect_equal(geno$data, df.no.names)
expect_equal(geno$markers, markers)
expect_equal(geno$alleles, alleles)
expect_equal(geno$format, "default")
# from data read from file
geno <- genotypeData(format = "default")
m <- cbind(NAME = geno$names, geno$data)
geno <- genotypes(m, format = "default")
expect_equal(geno$size,218)
expect_equal(geno$ids, getIds())
expect_equal(geno$names, getNames())
expect_equal(geno$data, m[,2:ncol(m)])
expect_equal(geno$markers, getMarkerNames())
expect_equal(names(geno$alleles), geno$markers)
for(marker.alleles in geno$alleles){
expect_equal(marker.alleles, c("A", "B"))
}
expect_equal(geno$format, "default")
})
test_that("create biparental genotype data from matrix or data frame", {
# from matrix with ids and marker names
m <- matrix(
sample(c(0,1,2), replace = TRUE, size = 1000),
nrow = 10, ncol = 100
)
ids <- paste("g", 1:10, sep = "-")
markers <- paste("m", 1:100, sep = "-")
rownames(m) <- ids
colnames(m) <- markers
geno <- genotypes(m, format = "biparental")
expect_equal(geno$size, 10)
expect_equal(geno$ids, ids)
expect_equal(geno$names, ids)
expect_equal(geno$data, m)
expect_equal(geno$markers, markers)
expect_equal(names(geno$alleles), geno$markers)
lapply(geno$alleles, function(markerAlleles){
expect_equal(markerAlleles, c("0", "1"))
})
expect_equal(geno$format, "biparental")
# with ids, no marker names
m2 <- m
colnames(m2) <- NULL
geno <- genotypes(m2, format = "biparental")
expect_equal(geno$data, m2)
expect_equal(geno$markers, as.character(rep(NA, ncol(m2))))
expect_null(names(geno$alleles))
# from data frame with ids, names and marker names
names <- letters[1:10]
df <- data.frame(NAME = names, m, check.names = F)
geno <- genotypes(df, format = "biparental")
expect_equal(geno$size, 10)
expect_equal(geno$ids, ids)
expect_equal(geno$names, names)
expect_equal(geno$data, m)
expect_equal(geno$markers, markers)
expect_equal(names(geno$alleles), geno$markers)
lapply(geno$alleles, function(markerAlleles){
expect_equal(markerAlleles, c("0", "1"))
})
expect_equal(geno$format, "biparental")
# from data frame without names
df <- data.frame(m, check.names = F)
geno <- genotypes(df, format = "biparental")
expect_equal(geno$names, ids)
expect_equal(geno$data, m)
expect_equal(geno$format, "biparental")
# from data frame without names or marker names
df <- data.frame(m, check.names = F)
colnames(df) <- NULL
geno <- genotypes(df, format = "biparental")
expect_equal(geno$data, m2)
expect_equal(geno$names, ids)
expect_true(all(is.na(geno$markers)))
expect_null(names(geno$alleles))
expect_equal(geno$format, "biparental")
# from data frame with names, no marker names
df <- data.frame(NAME = names, m, check.names = F)
colnames(df)[2:ncol(df)] <- NA
geno <- genotypes(df, format = "biparental")
expect_equal(geno$data, m2)
expect_equal(geno$names, names)
expect_true(all(is.na(geno$markers)))
expect_null(names(geno$alleles))
expect_equal(geno$format, "biparental")
# from data read from file
m <- genotypeData(format = "bi")$data
geno <- genotypes(m, format = "bi")
expect_equal(geno$data, m)
expect_equal(geno$ids, getIds())
expect_equal(geno$names, getIds())
expect_equal(geno$markers, getMarkerNames())
expect_equal(names(geno$alleles), geno$markers)
for(marker.alleles in geno$alleles){
expect_equal(marker.alleles, c("0", "1"))
}
expect_equal(geno$format, "biparental")
})
test_that("create frequency data from matrix or data frame", {
# from matrix
m <- matrix(
c(0.0, 0.3, 0.7, 0.5, 0.5, 0.0, 1.0,
0.4, 0.0, 0.6, 0.1, 0.9, 0.0, 1.0,
0.3, 0.3, 0.4, 1.0, 0.0, 0.6, 0.4),
byrow = TRUE, nrow = 3, ncol = 7
)
ids <- paste("g", 1:3, sep = "-")
columns <- c("M1.1", "M1.2", "M1.3", "M2-a", "M2-b", "M3_1", "M3_2")
alleles <- c("M1-a", "M1-b", "M1-c", "M2-a", "M2-b", "M3-a", "M3-b")
rownames(m) <- ids
colnames(m) <- columns
geno <- genotypes(m, alleles, format = "frequency")
expect_equal(geno$size, 3)
expect_equal(geno$ids, ids)
expect_equal(geno$names, ids)
expect_equal(geno$data, m)
expect_equal(geno$markers, c("M1", "M2", "M3"))
expect_equal(names(geno$alleles), geno$markers)
expect_equal(geno$alleles$M1, alleles[1:3])
expect_equal(geno$alleles$M2, alleles[4:5])
expect_equal(geno$alleles$M3, alleles[6:7])
expect_equal(geno$format, "frequency")
# from data frame without names
df <- data.frame(m, check.names = FALSE)
geno <- genotypes(df, alleles, format = "freq")
expect_equal(geno$data, m)
expect_equal(geno$format, "frequency")
# from data frame with names
names <- letters[1:3]
df <- data.frame(NAME = names, m, check.names = FALSE)
geno <- genotypes(df, alleles, format = "freq")
expect_equal(geno$data, m)
expect_equal(geno$ids, ids)
expect_equal(geno$names, names)
expect_equal(geno$format, "frequency")
# from matrix without allele names
geno <- genotypes(m, format = "freq")
expect_equal(geno$data, m)
expect_equal(names(geno$alleles), geno$markers)
expect_equal(geno$alleles$M1, as.character(rep(NA, 3)))
expect_equal(geno$alleles$M2, as.character(rep(NA, 2)))
expect_equal(geno$alleles$M3, as.character(rep(NA, 2)))
expect_equal(geno$format, "frequency")
# from data read from file
m <- genotypeData(format = "freq")$data
geno <- genotypes(m, format = "freq")
expect_equal(geno$data, m)
expect_equal(geno$ids, getIds())
expect_equal(geno$names, getIds())
expect_equal(geno$markers, getMarkerNames())
expect_equal(names(geno$alleles), geno$markers)
for(marker.alleles in geno$alleles){
expect_equal(marker.alleles, as.character(rep(NA, 2)))
}
expect_equal(geno$format, "frequency")
})
test_that("print", {
data <- genotypeData(format = "default")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "accessions = 218")
expect_output(print(data), "markers = 190")
expect_output(print(data), "alleles per marker = 2")
expect_output(print(data), "Format = default")
expect_output(print(data), "extdata[\\/]*genotypes.csv")
data <- genotypeData(format = "biparental")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "accessions = 218")
expect_output(print(data), "markers = 190")
expect_output(print(data), "alleles per marker = 2")
expect_output(print(data), "Format = biparental")
expect_output(print(data), "extdata[\\/]*genotypes-biparental.csv")
data <- genotypeData(format = "freq")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "accessions = 218")
expect_output(print(data), "markers = 190")
expect_output(print(data), "alleles per marker = 2")
expect_output(print(data), "Format = frequency")
expect_output(print(data), "extdata[\\/]*genotypes-frequency.csv")
data <- genotypeData(format = "default", size = "small")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "accessions = 5")
expect_output(print(data), 'Ids: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob\'" "Carol"')
expect_output(print(data), 'Names: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob" "Carol"')
expect_output(print(data), "markers = 4")
expect_output(print(data), 'Marker names: chr \\[1:4\\] "mk1" "mk,2" "mk\'3" "mk4"')
expect_output(print(data), "alleles per marker = 2-4")
expect_output(print(data), "Format = default")
expect_output(print(data), "testthat[\\/]*data[\\/]*genotypes-small.csv")
data <- genotypeData(format = "biparental", size = "small")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "accessions = 5")
expect_output(print(data), 'Ids: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob\'" "Carol"')
expect_output(print(data), 'Names: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob" "Carol"')
expect_output(print(data), "markers = 4")
expect_output(print(data), 'Marker names: chr \\[1:4\\] "mk1" "mk,2" "mk\'3" "mk4"')
expect_output(print(data), "alleles per marker = 2")
expect_output(print(data), "Format = biparental")
expect_output(print(data), "testthat[\\/]*data[\\/]*genotypes-bi-small.csv")
data <- genotypeData(format = "freq", size = "small")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "accessions = 5")
expect_output(print(data), 'Ids: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob\'" "Carol"')
expect_output(print(data), 'Names: chr \\[1:5\\] "Alice" "Dave" "Bob" "Bob" "Carol"')
expect_output(print(data), "markers = 4")
expect_output(print(data), 'Marker names: chr \\[1:4\\] "mk1" "mk,2" "mk\'3" "mk4"')
expect_output(print(data), "alleles per marker = 2-4")
expect_output(print(data), "Format = frequency")
expect_output(print(data), "testthat[\\/]*data[\\/]*genotypes-freq-small.csv")
})
#########################
context("Phenotype data")
#########################
test_that("arguments are checked", {
expect_error(phenotypes(), "path is required")
expect_error(phenotypes(file = 124), "should be a file path")
expect_error(phenotypes(file = "i/do/not/exist"), "does not exist")
expect_error(phenotypes(data = data.frame(), file = phenotypeFile()), "either data frame or file")
expect_error(phenotypes(file = "data/phenotypes-no-types.csv"), "types are required")
expect_error(phenotypes(data = 456), "should be a data frame")
# no column ids
df <- data.frame(1:10, letters[1:10])
colnames(df) <- NULL
expect_error(phenotypes(data = df), "names are required")
# invalid type
df <- data.frame(1:10, letters[1:10])
expect_error(phenotypes(df, types = c("I", "N", "R")), "does not correspond to number of data columns")
expect_error(phenotypes(df, types = c("I", "NSS")), "one or two characters")
expect_error(phenotypes(df, types = c("X", "Y")), "unknown scale", ignore.case = TRUE)
expect_error(phenotypes(file = "data/phenotypes-unknown-type.csv"), "unsupported variable type", ignore.case = TRUE)
# no auto type
df <- data.frame(rep(as.Date("2016-06-27"), 10), letters[1:10])
expect_error(phenotypes(df), "infer variable type")
# invalid ranges
df <- data.frame(1:10, letters[1:10])
expect_error(phenotypes(df, min = c("a", NA)), "should be numeric")
expect_error(phenotypes(df, max = c(F, NA)), "should be numeric")
expect_error(phenotypes(df, min = c(1, NA, 10)), "number of data columns")
expect_error(phenotypes(df, max = c(5, NA, 100)), "number of data columns")
})
test_that("class", {
expect_is(phenotypeData(), "chpheno")
expect_is(phenotypeData()$data, "data.frame")
})
test_that("read phenotype data from file", {
# 1: default dataset
pheno <- phenotypeData()
expect_equal(pheno$file, phenotypeFile())
expect_equal(pheno$size, 218)
expect_equal(pheno$ids, getIds())
expect_equal(rownames(pheno$data), pheno$ids)
expect_equal(pheno$names, getNames())
expect_equal(pheno$ranges, getRanges())
# check average Gower distance of all individuals without missing data
# (Core Hunter treats missing data slightly differently compared to StatMatch)
no.missing.data <- which(!apply(is.na(pheno$data), 1, any))
gd <- StatMatch::gower.dist(pheno$data[no.missing.data, ], rngs = pheno$ranges)
gd <- gd[lower.tri(gd)]
expect_equal(mean(gd), evaluateCore(no.missing.data, pheno, objective("EE", "GD")))
# 2: small dataset
pheno <- phenotypeData(size = "small")
expect_equal(pheno$size, 5)
expect_equal(pheno$ids, getIds(size = "small"))
expect_equal(rownames(pheno$data), pheno$ids)
expect_equal(pheno$names, getNames(size = "small"))
expect_equal(pheno$ranges, getRanges(size = "small"))
for(r in 1:pheno$size){
# extract row from Java object
row <- pheno$java$getRow(as.integer(r-1))
# compare values
for(trait in 1:ncol(pheno$data)){
R.value <- pheno$data[[trait]][r]
Java.value <- row$getValue(as.integer(trait-1))
expect_equal(as.character(Java.value), as.character(R.value))
}
}
# check average Gower distance (no missing data in small dataset)
gd <- StatMatch::gower.dist(pheno$data, rngs = pheno$ranges)
gd <- gd[lower.tri(gd)]
expect_equal(mean(gd), evaluateCore(1:5, pheno, objective("EE", "GD")))
})
test_that("phenotype file with single trait", {
pheno.file <- "data/phenotypes-single.csv"
pheno <- phenotypes(file = pheno.file)
expect_equal(pheno$file, normalizePath(pheno.file))
expect_equal(pheno$size, 10)
expect_equal(pheno$ids, as.character(1:10))
expect_equal(rownames(pheno$data), pheno$ids)
expect_equal(pheno$names, c("ABBUOTO", "ABRUSCO", "AGLIANICO", "ALBANA", "ALBANELLO", "ALBAROSSA",
"UVA_MELONA", "ALBARANZEULI_BIANCO", "ALEATICO", "ALICANTE_BOUSCHET"))
expect_equal(pheno$ranges, 17.5)
})
test_that("create phenotype data from data frame", {
# three different ways to compute Gower distance matrix
gd <- function(df){
pheno <- phenotypes(df)
StatMatch::gower.dist(df, rngs = pheno$ranges, KR.corr = FALSE)
}
gd2 <- function(df){
pheno <- phenotypes(df)
StatMatch::gower.dist(pheno$data, rngs = pheno$ranges, KR.corr = FALSE)
}
gd3 <- function(df){
pheno <- phenotypes(df)
m <- matrix(0.0, pheno$size, pheno$size)
for(i in 1:pheno$size){
if(i < pheno$size){
for(j in (i+1):pheno$size){
m[i,j] <- m[j,i] <- evaluateCore(c(i,j), pheno, objective("EE", "GD"))
}
}
}
return(m)
}
# create data frame
df <- data.frame(
n = sample(letters[1:10], size = 5, replace = TRUE),
i = sample(1:10, size = 5, replace = TRUE),
o = ordered(sample(letters[1:10], size = 5, replace = TRUE), levels = letters[1:10]),
r = rnorm(5),
b = sample(c(T,F), size = 5, replace = TRUE)
)
# create phenotype data with automatic types and ranges
pheno <- phenotypes(df)
expect_equal(pheno$size, 5)
expect_equal(pheno$ids, as.character(1:5))
expect_equal(pheno$names, as.character(1:5))
expect_equal(pheno$data$o, as.integer(df$o))
expect_equal(pheno$types, c("N", "I", "I", "R", "NB"))
expect_equal(pheno$ranges, c(NA, max(df$i) - min(df$i), 9, max(df$r) - min(df$r), NA))
expect_equal(gd(df), gd2(df))
expect_equal(gd(df), gd3(df))
# same but with first column as character instead of factor
df$n <- as.character(df$n)
pheno.factor <- pheno
pheno.character <- phenotypes(df)
expect_equal(pheno.character, pheno.factor)
# explicit types (all nominal string)
pheno <- phenotypes(df, types = rep("NS", 5))
for(c in 1:ncol(df)){
expect_is(pheno$data[[c]], "factor")
}
expect_equal(pheno$types, rep("NS", 5))
expect_equal(pheno$ranges, as.numeric(rep(NA, 5)))
expect_equal(gd(df), gd2(df))
expect_equal(gd(df), gd3(df))
# with dates
df.dates <- df
df.dates$dates <- format(as.Date(c(
"2016-06-01",
"2016-06-05",
"2016-06-02",
"2016-06-03",
"2016-06-04"
)), format = "%Y%m%d%H%M%S%z")
pheno <- phenotypes(df.dates, types = c(rep(NA, 5), "OA"))
expect_equal(pheno$types[6], "OA")
expect_equal(gd(df), gd2(df))
expect_equal(gd(df), gd3(df))
# with names
df <- cbind(NAME = letters[1:5], df, stringsAsFactors = FALSE)
pheno <- phenotypes(df)
expect_equal(pheno$names, letters[1:5])
# single trait
df <- df[, "r", drop = FALSE]
pheno <- phenotypes(df)
expect_equal(pheno$size, 5)
expect_equal(pheno$ids, as.character(1:5))
expect_equal(pheno$names, as.character(1:5))
expect_equal(pheno$types, "R")
expect_equal(pheno$ranges, max(df$r) - min(df$r))
})
test_that("print", {
data <- phenotypeData()
expect_output(print(data), "# Phenotypes")
expect_output(print(data), "accessions = 218")
expect_output(print(data), "traits = 4")
expect_output(print(data), 'Traits: "GY" "PHT" "EHT" "AD"')
expect_output(print(data), 'Quantitative traits: "GY" "PHT" "EHT" "AD"')
expect_output(print(data), 'Qualitative traits: n/a')
expect_output(print(data), "extdata[\\/]*phenotypes.csv")
data <- phenotypeData(size = "small")
expect_output(print(data), "# Phenotypes")
expect_output(print(data), "accessions = 5")
expect_output(print(data), "traits = 5")
expect_output(print(data), 'Traits: "trait 1" "trait 2" "trait 3" "trait 4" "trait 5"')
expect_output(print(data), 'Quantitative traits: "trait 2" "trait 3" "trait 4"')
expect_output(print(data), 'Qualitative traits: "trait 1" "trait 5"')
expect_output(print(data), "testthat[\\/]*data[\\/]*phenotypes-small.csv")
})
###########################
context("Core Hunter data")
###########################
test_that("arguments are checked", {
expect_error(coreHunterData(), "specify at least one")
expect_error(coreHunterData(distances = list(1, "a")), "class 'chdist'")
expect_error(coreHunterData("x"), "class 'chgeno'")
expect_error(coreHunterData(phenotypes = 123), "class 'chpheno'")
expect_error(coreHunterData(distances = "123"), "class 'chdist'")
})
test_that("class", {
expect_is(testData(), "chdata")
expect_is(testData()$dist, "chdist")
expect_is(testData()$geno, "chgeno")
expect_is(testData()$pheno, "chpheno")
})
test_that("distance matrix", {
data <- read.autodelim(distanceFile())
data$NAME <- NULL
matrix <- as.matrix(data)
expect_equal(testData()$dist$data, matrix)
})
test_that("size", {
expect_equal(testData()$size, 218)
})
test_that("example data", {
data <- exampleData()
expect_equal(data$dist, distanceData())
expect_equal(data$geno, genotypeData(format = "biparental"))
expect_equal(data$pheno, phenotypeData())
})
test_that("print", {
data <- testData()
expect_output(print(data), "Core Hunter data")
expect_output(print(data), "accessions = 218")
expect_output(print(data), "# Genotypes")
expect_output(print(data), "# Phenotypes")
expect_output(print(data), "# Precomputed distance matrix")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.