context("Test general table and field functions")
# read_table --------------------------------------------------------------
test_that("read_table can read all template files", {
dp_templates <- system.file("extdata", "templates", "biosample_attributes",
package = "srahelper")
fps <- list.files(dp_templates, full.names = TRUE)
mds <- lapply(fps, read_table)
# There should be N data frames
mdclass <- sapply(mds, class)
expect_equal(mdclass, rep("data.frame", length(fps)))
# They all have mandatory fields defined
mfclass <- sapply(lapply(mds, attr, "mandatory_fields"), class)
expect_equal(mfclass, rep("character", length(fps)))
})
test_that("read_table reads a Run Info table", {
testthat::skip("test not yet implemented")
fp <- system.file("extdata", "examples", "SraRunTable.txt",
package = "srahelper")
data <- read_table(fp)
})
# write_table -------------------------------------------------------------
check_table_written <- function(data, fp) {
# Rownames will be sample names
rownames(data) <- data$sample_name
# Columns are always character
data$sample_thing1 <- as.character(data$sample_thing1)
# The comments and mandatory fields attributes are always present from
# read_table
attr(data, "comments") <- character()
attr(data, "mandatory_fields") <- character()
data2 <- read_table(fp)
expect_identical(data2, data)
}
test_that("write_table writes metadata fields", {
# Test write_table alongside read_table by ensuring they match.
fp <- tempfile()
data <- setup_sra_table()
write_table(data, fp)
expect_true(file.exists(fp))
check_table_written(data, fp)
})
test_that("write_table handles overwrite options", {
# check that overwrite is off by default and works when on
fp <- tempfile()
# Make sure overwriting is blocked by default
data <- setup_sra_table()
write_table(data, fp)
expect_true(file.exists(fp))
data2 <- read_table(fp)
expect_equal(data2$sample_name[1], "sample1")
expect_error(write_table(data, fp),
paste("Destination file already exists:", fp))
data2 <- read_table(fp)
expect_equal(data2$sample_name[1], "sample1")
# Make sure overwriting works when enabled
data$sample_name[1] <- "newname"
write_table(data, fp, overwrite = TRUE)
data2 <- read_table(fp)
expect_equal(data2$sample_name[1], "newname")
})
test_that("write_table autodetermines output file path", {
fp <- tempfile()
dir.create(fp)
.cwd <- getwd()
setwd(fp)
data <- setup_sra_table()
fp <- write_table(data)
expect_true(file.exists(fp))
expect_equal(fp, "./data.tsv")
check_table_written(data, fp)
})
test_that("write_table uses file suffix given", {
fp <- tempfile()
dir.create(fp)
.cwd <- getwd()
setwd(fp)
data <- setup_sra_table()
fp <- write_table(data, fp_suffix = "stuff")
expect_true(file.exists(fp))
expect_equal(fp, "./stuff.tsv")
check_table_written(data, fp)
})
# check_uniqueness --------------------------------------------------------
test_that("check_uniqueness marks equivalent rows", {
data <- data.frame(sample_name = LETTERS[1:4],
description = letters[1:4])
u_obs <- check_uniqueness(data)
u_exp <- factor(rep("", 4))
expect_equal(u_obs, u_exp)
})
test_that("check_uniqueness marks unique rows", {
data <- setup_sra_table()
u_obs_int <- as.integer(sort(check_uniqueness(data)))
u_exp_int <- 1:5
expect_equal(u_obs_int, u_exp_int)
})
# validate_fields ---------------------------------------------------------
test_that("validate_fields handles biosample attributes", {
# This default has blanks for mandatory fields. There should be a warning
# raised and an entry in the returned vector for each one.
biosamples <- setup_biosamples()
expect_warning(problems <- validate_fields(biosamples))
mf <- attributes(biosamples)$mandatory_fields
blnks <- mf[sapply(mf, function(f) any(blank(biosamples[[f]])))]
problems_expected <- paste("Mandatory field is missing values:", blnks)
expect_equal(names(attributes(problems)), "fields")
expect_equal(length(attributes(problems)[["fields"]]), 11)
expect_equivalent(problems, problems_expected)
# What if a mandatory column is completely missing? Should be the same
# result.
biosamples$organism <- NULL
expect_warning(problems <- validate_fields(biosamples))
expect_equivalent(problems, problems_expected)
# And if we have text in all the mandatory fields? For now it should think
# everything is fine, though really there should be more checks.
biosamples <- setup_biosamples()
biosamples <- fill_blanks(mf, biosamples)
problems <- validate_fields(biosamples)
expect_equivalent(problems, character())
})
test_that("validate_fields handles library metadata", {
# This default has blanks for mandatory fields. There should be a warning
# raised and an entry in the returned vector for each one.
sample_attrs <- setup_sra_table()
col_pairs <- c(title = "sample_name", library_ID = "sample_thing1")
constants <- c(platform = "ILLUMINA")
metadata <- build_metadata(sample_attrs = sample_attrs,
col_pairs = col_pairs,
constants = constants)
expect_warning(problems <- validate_fields(metadata))
mf <- attributes(metadata)$mandatory_fields
blnks <- mf[sapply(mf, function(f) any(blank(metadata[[f]])))]
blnks <- blnks[! blnks %in% c("biosample_accession", "bioproject_accession")]
problems_expected <- paste("Mandatory field is missing values:", blnks)
expect_equivalent(problems, problems_expected)
# What if a mandatory column is completely missing? Should be the same
# result.
metadata$instrument_model <- NULL
expect_warning(problems <- validate_fields(metadata))
expect_equivalent(problems, problems_expected)
# And if we have text in all the mandatory fields?
sample_attrs$filename <- paste0(sample_attrs$sample_name, ".fastq")
col_pairs["library_ID"] <- "sample_name"
constants["library_strategy"] <- "OTHER"
constants["library_source"] <- "OTHER"
constants["library_selection"] <- "RANDOM"
constants["library_layout"] <- "Paired"
constants["instrument_model"] <- "Illumina MiSeq"
constants["filetype"] <- "fastq"
metadata <- build_metadata(sample_attrs = sample_attrs,
col_pairs = col_pairs,
constants = constants)
metadata <- fill_blanks(mf, metadata)
problems <- validate_fields(metadata)
expect_equivalent(problems, character())
})
test_that("validate_fields checks filenames for directories", {
# The processing gets confused server-side if the upload directory has any
# sort of nested structure.
sra_table <- setup_sra_table()
sra_table$filename <- paste0(sra_table$sample_name, ".fastq")
problems <- validate_fields(sra_table)
expect_equivalent(problems, character())
sra_table$filename <- file.path("folder", sra_table$filename)
expect_warning(problems <- validate_fields(sra_table))
problems_expected <- "Filename column contains directory paths: filename"
expect_equivalent(problems, problems_expected)
})
test_that("validate_fields checks instrument model and platform", {
# The instrument model needs to match the platform selected.
sra_table <- setup_sra_table()
# These match up, should be fine.
sra_table$platform <- "ILLUMINA"
sra_table$instrument_model <- "Illumina MiSeq"
problems <- validate_fields(sra_table)
expect_equivalent(problems, character())
# PacBio RS is a valid instrument model but not for ILLUMINA.
sra_table$instrument_model <- "PacBio RS"
expect_warning(problems <- validate_fields(sra_table))
expect_equivalent(problems, "Mismatched instrument_model and platform values")
# These are invalid values for either column.
sra_table$instrument_model <- "does not exist"
expect_warning(problems <- validate_fields(sra_table))
expect_equivalent(problems, "Unknown instrument_model values")
sra_table$platform <- "does not exist"
sra_table$instrument_model <- "PacBio RS"
expect_warning(problems <- validate_fields(sra_table))
expect_equivalent(problems, "Unknown platform values")
})
test_that("validate_fields warns about sample uniqueness", {
# SRA will refuse biosample metadata that it doesn't think distinguishes
# samples well enough.
biosamples <- setup_biosamples()
mf <- attributes(biosamples)$mandatory_fields
biosamples <- setup_biosamples()
biosamples <- fill_blanks(mf, biosamples)
biosamples$host <- 1
expect_warning(problems <- validate_fields(biosamples))
expect_equivalent(problems,
paste("Multiple entries cannot have",
"identical attributes (see ?check_uniqueness)"))
})
test_that("validate_fields can skip warnings", {
sra_table <- setup_sra_table()
sra_table$platform <- "does not exist"
expect_warning(problems <- validate_fields(sra_table))
expect_equivalent(problems, "Unknown platform values")
problems <- validate_fields(sra_table, quiet = TRUE)
expect_equivalent(problems, "Unknown platform values")
})
# fill_blanks -------------------------------------------------------------
test_that("fill_blanks fills blank values in data frame", {
data <- data.frame(A = c("a", "b", "", "c", NA),
B = c("a", "b", "", "c", NA),
stringsAsFactors = FALSE)
result_expected <- data.frame(A = c("a", "b", "missing", "c", "missing"),
B = c("a", "b", "missing", "c", "missing"),
stringsAsFactors = FALSE)
result_observed <- fill_blanks(colnames(data), data)
expect_equal(result_observed, result_expected)
})
test_that("fill_blanks fills blank values in vector", {
vec <- c("a", "b", "", "c", NA)
result_expected <- c("a", "b", "missing", "c", "missing")
result_observed <- fill_blanks(vec)
expect_equal(result_observed, result_expected)
})
test_that("fill_blanks warns of unknown blank text", {
vec <- c("a", "b", "", "c", NA)
result_expected <- c("a", "b", "default", "c", "default")
expect_warning(result_observed <- fill_blanks(vec, value = "default"))
expect_equal(result_observed, result_expected)
})
test_that("fill_blanks ignores factors", {
data <- data.frame(A = c("a", "b", "", "c", NA),
B = c("a", "b", "", "c", NA))
result_expected <- data
result_observed <- fill_blanks(colnames(data), data)
expect_equal(result_observed, result_expected)
})
# datemunge ---------------------------------------------------------------
test_that("datemunge formats dates", {
dates <- c("1/1/2006", "10/5/2009", "12/31/2012")
dates_exp <- c("2006-01-01", "2009-10-05", "2012-12-31")
dates_obs <- datemunge(dates)
expect_equal(dates_obs, dates_exp)
})
test_that("datemunge accepts arbitrary input field ordering", {
# As above, but, D/M/Y
dates <- c("1/1/2006", "5/10/2009", "31/12/2012")
dates_exp <- c("2006-01-01", "2009-10-05", "2012-12-31")
dates_obs <- datemunge(dates, ord_input = c("D", "M", "Y"))
expect_equal(dates_obs, dates_exp)
})
test_that("datemunge rejects invalid input", {
dates <- c("1/1/2006", "10/5/2009", "not a date")
expect_error(datemunge(dates))
dates[3] <- "also/not/date"
expect_error(datemunge(dates))
# Oops, we forgot to specify the field order
dates <- c("1/1/2006", "5/10/2009", "31/12/2012")
expect_error(datemunge(dates))
})
# blank -------------------------------------------------------------------
test_that("blank reports NA or empty string as blank", {
result_expected <- c(TRUE, TRUE, FALSE)
result_observed <- blank(c("", NA, "text"))
expect_equal(result_observed, result_expected)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.