tests/testthat/test-IO.R

################################################################################
# Use testthat to test file import and resulting class (and values)
################################################################################
library("phyloseq"); library("testthat")
# # # # TESTS!

################################################################################
# import_mothur tests
mothlist  <- system.file("extdata", "esophagus.fn.list.gz", package="phyloseq")
mothgroup <- system.file("extdata", "esophagus.good.groups.gz", package="phyloseq")
mothtree  <- system.file("extdata", "esophagus.tree.gz", package="phyloseq")
cutoff    <- "0.10"
esophman  <- import_mothur(mothlist, mothgroup, mothtree, cutoff)	
# mothur "Shared" file, create with mothur from these example data files
mothshared = system.file("extdata", "esophagus.fn.shared.gz", package="phyloseq")
constaxonomy = system.file("extdata", "mothur_example.cons.taxonomy.gz", package="phyloseq")

test_that("import_mothur: import of esophagus dataset from mothur files in extdata/ produces a phyloseq object", {
	expect_that(esophman, is_a("phyloseq"))
})

test_that("import_mothur: The two phyloseq objects, example and just-imported, are identical", {
	data("esophagus")
	expect_that(esophagus, is_equivalent_to(esophman))
})

test_that("import_mothur: Test mothur file import on the (esophagus data).", {
	smlc <- show_mothur_cutoffs(mothlist)
	expect_that(smlc, is_equivalent_to(c("unique", "0.00", "0.01", "0.02", "0.03", "0.04", "0.05", "0.06", "0.07", "0.08", "0.09", "0.10")))	
})

test_that("import_mothur: abundances can be manipulated mathematically", {
	x1 <- as(otu_table(esophman), "matrix")
	expect_that(2*x1-x1, is_equivalent_to(x1) )
})

test_that("import_mothur: empty stuff is NULL", {
	expect_that(tax_table(esophman, FALSE), is_a("NULL"))
	expect_that(sample_data(esophman, FALSE), is_a("NULL"))
})

test_that("import_mothur: Expected classes of non-empty components", {
	expect_that(otu_table(esophman), is_a("otu_table"))
	expect_that(phy_tree(esophman), is_a("phylo"))
})

test_that("import_mothur: imported files become S4 object", {
	expect_that(isS4(esophman), is_true())
})

test_that("import_mothur: show method output tests", {
  expect_output(print(esophman), "phyloseq-class experiment-level object")
})

test_that("Test newer supported mothur output files, constaxonomy and shared files", {
  # Shared file
  sharedOTU = import_mothur(mothur_shared_file=mothshared, cutoff=cutoff)
  expect_is(sharedOTU, "otu_table")
  expect_equivalent(as(sharedOTU[1:5, ], "matrix")[, "B"], c(50, 37, 14, 52, 2))
  expect_equivalent(as(sharedOTU[1, ], "matrix")[1, ], c(50, 19, 5))
  # Constaxonomy file
  tax = import_mothur(mothur_constaxonomy_file=constaxonomy)
  expect_is(tax, "taxonomyTable")
})

################################################################################
# import_RDP tests
test_that("the import_RDP_otu function can properly read gzipped-example", {
  # Setup data
	otufile <- system.file("extdata",
	                       "rformat_dist_0.03.txt.gz",
	                       package="phyloseq")
	ex_otu  <- import_RDP_otu(otufile)	
	# test expectations
	expect_output(print(head(t(ex_otu))), "OTU Table:")
	expect_is(ex_otu, "otu_table")
	expect_equal(ntaxa(ex_otu), 5276)
	expect_equal(nsamples(ex_otu), 14)
	expect_is(sample_sums(ex_otu), "numeric")
})


################################################################################
# import_qiime tests
################################################################################
otufile <- system.file("extdata", "GP_otu_table_rand_short.txt.gz", package="phyloseq")
mapfile <- system.file("extdata", "master_map.txt", package="phyloseq")
trefile <- system.file("extdata", "GP_tree_rand_short.newick.gz", package="phyloseq")
rs_file <- system.file("extdata", "qiime500-refseq.fasta", package="phyloseq")

t0 <- import_qiime(otufile, mapfile, trefile, rs_file, verbose=FALSE)
test_that("Class of import result is phyloseq-class", {
	expect_is(t0, "phyloseq")
})

test_that("Classes of components are as expected", {
	expect_is(otu_table(t0), ("otu_table"))
	expect_is(tax_table(t0), ("taxonomyTable"))
	expect_is(sample_data(t0), ("sample_data"))
	expect_is(phy_tree(t0), ("phylo"))		
	expect_is(refseq(t0), ("DNAStringSet"))
})

test_that("Features of the abundance data are consistent, match known values", {
	expect_equal(sum(taxa_sums(t0)), (1269671L))
	expect_equal(sum(taxa_sums(t0)==0), (5L))
	expect_equal(sum(taxa_sums(t0)>=100), (183L))
	expect_equal(sum(taxa_sums(t0)), (sum(sample_sums(t0))))
	expect_equal(sum(sample_sums(t0) > 10000L), (20L))
	expect_equal(nsamples(t0), (26L))
	expect_equal(ntaxa(t0), (500L))
	expect_equal(length(rank_names(t0)), (7L))
})

test_that("Features of the taxonomy table match expected values", {
	expect_equal(length(rank_names(t0)), (7L))
	expect_equal(rank_names(t0), 
               c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species"))
	tax53 = as(tax_table(t0), "matrix")[53, ]
	expect_equivalent(
	  tax53, 
	  c("Bacteria", "Proteobacteria", "Deltaproteobacteria",
	    "Desulfovibrionales", "Desulfomicrobiaceae", 
	    "Desulfomicrobium", "Desulfomicrobiumorale"))
})
################################################################################
# parse function tests - note, these are also used by import_biom

test_that("Taxonomy vector parsing functions behave as expected", {
		
	chvec1 = c("Bacteria", "Proteobacteria", "Gammaproteobacteria",
		"Enterobacteriales", "Enterobacteriaceae", "Escherichia")
	
	chvec2 = c("k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria",
		"o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__")
	
	chvec3 = c("Root", "k__Bacteria", "p__Firmicutes", "c__Bacilli",
		"o__Bacillales", "f__Staphylococcaceae")
	
	# Example where only some entries have greengenes prefix.
	chvec4 = c("Root", "k__Bacteria", "Firmicutes", "c__Bacilli",
		"o__Bacillales", "Staphylococcaceae", "z__mistake")

	# Even more terrible example, where leading or trailing space characters included
	# (the exact weirdnes of chvec4, compounded by leading and/or trailing space characters)
	chvec5 = c("  Root \n ", " k__Bacteria", "  Firmicutes", " c__Bacilli   ",
		"o__Bacillales  ", "Staphylococcaceae ", "\t z__mistake \t\n")		

	# This should give a warning because there were no greengenes prefixes
	expect_warning(t1 <- parse_taxonomy_greengenes(chvec1))
	# And output from previous call, t1, should be identical to default
	expect_that(parse_taxonomy_default(chvec1), is_equivalent_to(t1))
	
	# All the greengenes entries get trimmed by parse_taxonomy_greengenes
	expect_that(all(sapply(chvec2, nchar) > sapply(parse_taxonomy_greengenes(chvec2), nchar)), is_true())
	# None of the greengenes entries are trimmed by parse_taxonomy_default
	expect_that(any(sapply(chvec2, nchar) > sapply(parse_taxonomy_default(chvec2), nchar)), is_false())
	
	# Check that the "Root" element is not removed by parse_taxonomy_greengenes and parse_taxonomy_default.
	expect_that("Root" %in% chvec3, is_true())
	expect_that("Root" %in% parse_taxonomy_default(chvec3), is_true())
	expect_that(length(parse_taxonomy_default(chvec3)) == length(chvec3), is_true())
	
	# Check that non-greengenes prefixes, and those w/o prefixes, are given dummy rank(s)
	chvec4ranks = names(parse_taxonomy_greengenes(chvec4))
	expect_that(grep("Rank", chvec4ranks, fixed=TRUE), is_equivalent_to(c(1, 3, 6, 7)))
	# Check that everything given dummy rank in default parse.
	chvec4ranks = names(parse_taxonomy_default(chvec4))
	expect_that(grep("Rank", chvec4ranks, fixed=TRUE), is_equivalent_to(1:7))
	
	# chvec4 and chvec5 result in identical vectors.
	expect_that(parse_taxonomy_default(chvec4), is_equivalent_to(parse_taxonomy_default(chvec5)))
	expect_that(parse_taxonomy_greengenes(chvec4), is_equivalent_to(parse_taxonomy_greengenes(chvec5)))	
	
	# The names of chvec5, greengenes parsed, should be...
	correct5names = c("Rank1", "Kingdom", "Rank3", "Class", "Order", "Rank6", "Rank7")
	expect_that(names(parse_taxonomy_greengenes(chvec5)), is_equivalent_to(correct5names))
})

################################################################################
# import_biom tests

rich_dense_biom  <- system.file("extdata", "rich_dense_otu_table.biom",  package="phyloseq")
rich_sparse_biom <- system.file("extdata", "rich_sparse_otu_table.biom", package="phyloseq")
min_dense_biom   <- system.file("extdata", "min_dense_otu_table.biom",   package="phyloseq")
min_sparse_biom  <- system.file("extdata", "min_sparse_otu_table.biom",  package="phyloseq")
# the tree and refseq file paths that are suitable for all biom format style examples
treefilename = system.file("extdata", "biom-tree.phy",  package="phyloseq")
refseqfilename = system.file("extdata", "biom-refseq.fasta",  package="phyloseq")

test_that("Importing biom files yield phyloseq objects", {
	library("biomformat")
	rdbiom = read_biom(rich_dense_biom)
	rsbiom = read_biom(rich_sparse_biom)

	rich_dense = import_biom(rdbiom)
	rich_sparse = import_biom(rsbiom)

	expect_is(rich_dense,  ("phyloseq"))
	expect_is(rich_sparse, ("phyloseq"))
	
	expect_equal(ntaxa(rich_dense), (5L))
	expect_equal(ntaxa(rich_sparse), (5L))

	# # Component classes
	# sample_data
	expect_is(access(rich_dense,  "sam_data"), ("sample_data"))
	expect_is(access(rich_sparse, "sam_data"), ("sample_data"))

	# taxonomyTable
	expect_is(access(rich_dense,  "tax_table"), ("taxonomyTable"))
	expect_is(access(rich_sparse, "tax_table"), ("taxonomyTable"))

	# otu_table
	expect_is(access(rich_dense,  "otu_table"), ("otu_table"))
	expect_is(access(rich_sparse, "otu_table"), ("otu_table"))
})

test_that("The different types of biom files yield phyloseq objects", {
	rich_dense = import_biom(rich_dense_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	rich_sparse = import_biom(rich_sparse_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	min_dense = import_biom(min_dense_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	min_sparse = import_biom(min_sparse_biom, treefilename, refseqfilename, parseFunction=parse_taxonomy_greengenes)
	
	expect_is(rich_dense,  ("phyloseq"))
	expect_is(rich_sparse, ("phyloseq"))
	expect_is(min_dense,   ("phyloseq"))
	expect_is(min_sparse,  ("phyloseq"))

	expect_equal(ntaxa(rich_dense), (5L))
	expect_equal(ntaxa(rich_sparse), (5L))
	expect_equal(ntaxa(min_dense), (5L))
	expect_equal(ntaxa(min_sparse), (5L))			

	# # Component classes
	# sample_data
	expect_is(access(rich_dense,  "sam_data"), ("sample_data"))
	expect_is(access(rich_sparse, "sam_data"), ("sample_data"))
	expect_is(access(min_dense,   "sam_data"), ("NULL"))
	expect_is(access(min_sparse,  "sam_data"), ("NULL"))

	# taxonomyTable
	expect_is(access(rich_dense,  "tax_table"), ("taxonomyTable"))
	expect_is(access(rich_sparse, "tax_table"), ("taxonomyTable"))
	expect_is(access(min_dense,   "tax_table"), ("NULL"))
	expect_is(access(min_sparse,  "tax_table"), ("NULL"))		
	
	# phylo tree
	expect_is(access(rich_dense,  "phy_tree"), ("phylo"))
	expect_is(access(rich_sparse, "phy_tree"), ("phylo"))
	expect_is(access(min_dense,   "phy_tree"), ("phylo"))
	expect_is(access(min_sparse,  "phy_tree"), ("phylo"))

	# reference sequences
	expect_true(inherits(access(rich_dense,  "refseq"), "XStringSet"))
	expect_true(inherits(access(rich_sparse,  "refseq"), "XStringSet"))
	expect_true(inherits(access(min_dense,  "refseq"), "XStringSet"))
	expect_true(inherits(access(min_sparse,  "refseq"), "XStringSet"))
	expect_is(access(rich_dense,  "refseq"), ("DNAStringSet"))
	expect_is(access(rich_sparse, "refseq"), ("DNAStringSet"))
	expect_is(access(min_dense,   "refseq"), ("DNAStringSet"))
	expect_is(access(min_sparse,  "refseq"), ("DNAStringSet"))	
		
	# otu_table		
	expect_is(access(rich_dense,  "otu_table"), ("otu_table"))
	expect_is(access(rich_sparse, "otu_table"), ("otu_table"))
	expect_is(access(min_dense,   "otu_table"), ("otu_table"))
	expect_is(access(min_sparse,  "otu_table"), ("otu_table"))
	
	# Compare values in the otu_table. For some reason the otu_tables are not identical
	# one position is plus-two, another is minus-two
	combrich <- c(access(rich_dense, "otu_table"), access(rich_sparse, "otu_table"))
	expect_equivalent(sum(diff(combrich, length(access(rich_dense, "otu_table")))), (0))
	expect_equivalent(max(diff(combrich, length(access(rich_dense, "otu_table")))), (2))
	expect_equivalent(min(diff(combrich, length(access(rich_dense, "otu_table")))), (-2))
	combmin <- c(access(min_dense, "otu_table"), access(min_sparse, "otu_table"))
	expect_equivalent(sum(diff(combmin, length(access(min_dense, "otu_table")))), (0))
	expect_equivalent(max(diff(combmin, length(access(min_dense, "otu_table")))), (2))
	expect_equivalent(min(diff(combmin, length(access(min_dense, "otu_table")))), (-2))

	expect_equivalent(access(min_dense, "otu_table"), (access(rich_dense, "otu_table")))
	expect_equivalent(access(min_sparse, "otu_table"), (access(rich_sparse, "otu_table")))

	# Compare values in the sample_data
	expect_equivalent(access(rich_dense, "sam_data"), (access(rich_sparse, "sam_data")))
	
	# Compare values in the taxonomyTable
	expect_equivalent(access(rich_dense, "tax_table"), (access(rich_sparse, "tax_table")))

})

test_that("the import_biom and import(\"biom\", ) syntax give same result", {
	x1 <- import_biom(rich_dense_biom, parseFunction=parse_taxonomy_greengenes)
	x2 <- import("biom", BIOMfilename=rich_dense_biom, parseFunction=parse_taxonomy_greengenes)	
	expect_equivalent(x1, x2)
})
################################################################################
# read_tree tests
test_that("The read_tree function works as expected:", {
	GPNewick <- read_tree(system.file("extdata", "GP_tree_rand_short.newick.gz", package="phyloseq"))
	expect_that(GPNewick, is_a("phylo"))
	expect_equal(ntaxa(GPNewick), length(GPNewick$tip.label))
	expect_equal(ntaxa(GPNewick), 500L)
	expect_equal(GPNewick$Nnode, 499L)
	expect_equivalent(taxa_names(GPNewick), GPNewick$tip.label)
	# Now read a nexus tree... 
	# Some error-handling expectations
	expect_that(read_tree("alskflsakjsfskfhas.akshfaksj"), gives_warning()) # file not exist
	not_tree <- system.file("extdata", "esophagus.good.groups.gz", package="phyloseq")
	expect_that(read_tree(not_tree), is_a("NULL")) # file not a tree, gives NULL
	expect_that(read_tree(not_tree, TRUE), throws_error()) # file not a tree, check turned on/TRUE
})
# read_tree_greengenes
test_that("The specialized read_tree_greengenes function works:", {
  # The included, gzipped version of the the 13_5 73% similarity greengenes tree.
  # It causes ape::read.tree to fail with an error, but read_tree_greengenes should be fine.
  treefile = system.file("extdata", "gg13-5-73.tree.gz", package="phyloseq")
  x = read_tree_greengenes(treefile)
  expect_is(x, "phylo")
  # Happen to know that all OTU names should be numbers.
  expect_match(x$tip.label, "[[:digit:]]+")
  # All tip/OTU names should be unique
  expect_false(any(duplicated(taxa_names(x))))
  # The more general read_tree function reads, but they are not equal
  y = read_tree(treefile)
  expect_false(all.equal(x, y))
})
################################################################################
# microbio_me_qiime tests
# This tests different features and expected behavior for
# the functioning of an interface function to the 
# microbio.me/qiime data repository.
#
zipfile = "study_816_split_library_seqs_and_mapping.zip"
zipfile = system.file("extdata", zipfile, package="phyloseq")
tarfile = "study_816_split_library_seqs_and_mapping.tar.gz"
tarfile = system.file("extdata", tarfile, package="phyloseq")
tarps = suppressWarnings(microbio_me_qiime(tarfile))
zipps = suppressWarnings(microbio_me_qiime(zipfile))
# This function is intended to interface with an external server,
# as described in the documentation.
# However, I don't want successful testing of this package to 
# rely on the presence or form of particular files on an 
# external server, so these tests will be done exclusively on 
# compressed file(s) representing what is exposed by the data server
# It is up to the user to provide valid a URL in practice,
# and the function attempts to provide informative status
# and error messages if things go awry.
test_that("The microbio_me_qiime imports as expected: .tar.gz", {
  expect_is(tarps, "phyloseq")
  expect_is(sample_data(tarps, errorIfNULL=FALSE), "sample_data")
  expect_is(otu_table(tarps, errorIfNULL=FALSE), "otu_table")
  expect_identical(nrow(otu_table(tarps)), 50L)
  expect_identical(nrow(sample_data(tarps)), 15L)
})
test_that("The microbio_me_qiime imports as expected: .zip", {  
  expect_is(zipps, "phyloseq")
  expect_is(sample_data(zipps, errorIfNULL=FALSE), "sample_data")
  expect_is(otu_table(zipps, errorIfNULL=FALSE), "otu_table")
  expect_identical(nrow(otu_table(zipps)), 50L)
  expect_identical(nrow(sample_data(zipps)), 15L)
})
test_that("Results of .tar.gz and .zip should be identical", {  
  expect_identical(tarps, zipps)
  expect_identical(sample_data(tarps), sample_data(zipps))
  expect_identical(otu_table(tarps), otu_table(zipps))
})
################################################################################
# import_usearch_uc
################################################################################
usearchfile = system.file("extdata", "usearch.uc", package="phyloseq")
OTU1 = import_usearch_uc(usearchfile)
test_that("import_usearch_uc: Properly omit entries from failed search", {  
  ucLines = readLines(usearchfile)
  expect_identical( sum(OTU1), (length(ucLines) - length(grep("*", ucLines, fixed=TRUE))) )
  expect_identical( nrow(OTU1), 37L)
  expect_identical( nrow(OTU1), nsamples(OTU1))
  expect_identical( ncol(OTU1), ntaxa(OTU1))
  expect_identical( ncol(OTU1), 33L)
  expect_equivalent(colSums(OTU1)[1:6], c(6, 1, 2, 1, 1, 1))
})
################################################################################
joey711/phyloseq documentation built on Nov. 4, 2022, 1:16 a.m.