# TODO: re-do docs
# TODO: Check and remove extranous codes # TODO: recurse file listing for e.g. remote ZIP file
# TODO: readtext with csv doesn"t seem to require text_field
dvars <- function(x) as.data.frame(x)[, -c(1:2)]
test_that("test readtext with single filename", {
fox <- c(fox.txt = "The quick brown fox jumps over the lazy dog.")
expect_equal(
as.character(readtext("../data/fox/fox.txt")),
fox
)
})
test_that("test for deprecated warning for text()", {
fox <- c(fox.txt = "The quick brown fox jumps over the lazy dog.")
expect_warning(
texts(readtext("../data/fox/fox.txt")),
"'texts.readtext' is deprecated.
Use 'Use as.character() instead' instead.",
fixed = TRUE
)
})
test_that("test readtext with vector of filenames", {
expect_equal(
length(as.character(readtext(
c(
"../data/fruits/apple.txt",
"../data/fruits/orange.txt"
)
))),
2
)
})
test_that("test readtext with glob-style mask", {
expect_equal(
length(as.character(readtext(
"../data/glob/*.txt"
))),
5
)
expect_equal(
length(as.character(readtext(
"../data/glob/?.txt"
))),
4
)
# Glob in non-last part of filename
expect_equal(
length(as.character(readtext(
"../data/glob/*/test.txt"
))),
2
)
# It would be nice to be able to test that escaped glob doesn"t special
# characters in filename, but R check won"t allow a file of this name to
# exist in the package... This should still pass if run the test manually
# (having created the file, supposing your platform supports it)
# expect_equal(
# length(as.character(readtext(
# "../data/glob/special/\\*.txt"
# ))),
# 1
# )
})
test_that("test structured readtext with glob-style mask", {
expect_equal(
nrow(readtext("../data/csv/test*.csv", text_field = "text")),
6
)
expect_equal(
nrow(readtext("../data/csv/test*.csv", text_field = "text")),
6
)
})
test_that("test remote text file", {
expect_equal(
as.character(readtext("https://raw.githubusercontent.com/kbenoit/readtext/master/tests/data/fox/fox.txt")),
c(fox.txt = "The quick brown fox jumps over the lazy dog.")
)
# ignore_missing_files with an existing file should make no difference
expect_equal(
as.character(readtext("https://raw.githubusercontent.com/kbenoit/readtext/master/tests/data/fox/fox.txt",
ignore_missing_files = TRUE)),
c(fox.txt = "The quick brown fox jumps over the lazy dog.")
)
# ingores parameters in URL
expect_equal(
as.character(readtext("https://raw.githubusercontent.com/kbenoit/readtext/master/tests/data/fox/fox.txt?x=1&y=2",
ignore_missing_files = TRUE)),
c(fox.txt = "The quick brown fox jumps over the lazy dog.")
)
})
test_that("test remote csv file", {
expect_equal(
as.character(readtext("https://raw.githubusercontent.com/kbenoit/readtext/master/tests/data/csv/test.csv", text_field = "text")),
c(test.csv.1 = "Lorem ipsum.", test.csv.2 = "Dolor sit")
)
})
context("test that require recursive invocation of listFileNames (i.e. because a special filename resolves to another special filename)")
test_that("test zip file", {
skip_on_cran()
skip_on_os("windows")
DATA_DIR <- system.file("extdata/", package = "readtext")
expect_equal(
length(as.character(readtext(paste0(DATA_DIR, "/data_files_encodedtexts.zip")))),
36
)
})
test_that("test warning for unrecognized filetype", {
expect_warning(
readtext("../data/empty/empty.nonesuch"),
paste0("Unsupported extension ", sQuote("nonesuch"), " of file *")
)
# But test that it still loads
expect_warning(
readtext("../data/unknown/unknown"),
paste0("Unsupported extension ", sQuote(""), " of file *")
)
expect_equal(
readtext("../data/unknown/unknown", verbosity = 0)$text,
"The quick brown fox jumps over the lazy dog."
)
})
# TODO: Refactor this to loop over filetypes
test_that("test csv files", {
# Test corpus object
testcorpus <- readtext("../data/csv/test.csv", text_field = "text")
expect_identical(
as.data.frame(testcorpus)[, -c(1:2)],
data.frame(list(colour = c("green", "red"), number = c(42L, 99L)))
)
expect_equal(
as.character(testcorpus),
c(test.csv.1 = "Lorem ipsum.", test.csv.2 = "Dolor sit")
)
expect_error(
readtext("../data/csv/*", text_field = "nonesuch"),
"There is no field called"
)
expect_error(
readtext("../data/csv/*", text_field = 9000),
"There is no 9000th field"
)
})
test_that("test tab files", {
testcorpus <- readtext("../data/tab/test.tab", text_field = "text")
expect_equal(
as.data.frame(testcorpus)[, -c(1:2)],
data.frame(list(colour = c("green", "red"), number = c(42, 99)),
stringsAsFactors = FALSE)
)
expect_equal(
testcorpus$text,
unname(c(test.tab.1 = "Lorem ipsum.", test.tab.2 = "Dolor sit"))
)
expect_error(
readtext("../data/tab/test.tab", text_field = "nonexistent"),
"There is no field called nonexistent"
)
})
test_that("test tsv files", {
testcorpus <- readtext("../data/tsv/test.tsv", text_field = "text")
expect_that(
as.data.frame(testcorpus)[, -c(1:2)],
equals(data.frame(list(colour = c("green", "red"), number = c(42, 99)),
stringsAsFactors = FALSE))
)
expect_that(
as.character(testcorpus),
equals(c(test.tsv.1 = "Lorem ipsum.", test.tsv.2 = "Dolor sit"))
)
expect_error(
readtext("../data/tsv/test.tsv", text_field = "nonexistant"),
"There is no field called nonexistant"
)
})
test_that("test xml files", {
# Test corpus object
testcorpus <- readtext("../data/xml/test.xml", text_field = "text")
expect_equal(
data.frame(testcorpus[, -c(1, 2)]),
data.frame(list(colour = c("green", "red"), number = c(42, 99)),
stringsAsFactors = FALSE)
)
expect_equal(
unname(as.character(testcorpus)),
c("Lorem ipsum.", "Dolor sit")
)
expect_identical(
testcorpus$doc_id,
c("test.xml.1", "test.xml.2")
)
expect_warning(
readtext("../data/xml/test.xml", text_field = 1),
"You should specify text_field by name.*"
)
expect_identical(
unname(as.character(suppressWarnings(readtext("../data/xml/test.xml", text_field = 1)))),
c("Lorem ipsum.", "Dolor sit")
)
expect_identical(
testcorpus$doc_id,
c("test.xml.1", "test.xml.2")
)
expect_error(
readtext("../data/xml/test.xml", text_field = "nonesuch"),
"There is no field called"
)
expect_error(
readtext("../data/xml/test.xml", text_field = 9000),
"There is no 9000th field"
)
})
test_that("test xml files with XPath", {
expected <- c("The quick brown fox")
names(expected) <- "tei.xml"
actual <- readtext("../data/xml/tei.xml",
text_field = "/d1:TEI/d1:text/d1:body//d1:p")#,
# namespaces = c(tei = "https://www.tei-c.org/ns/1.0"))
expect_equal(as.character(actual), expected)
actual <- readtext("../data/xml/tei.xml", collapse = "P",
text_field = "/d1:TEI/d1:text/d1:body//d1:p")
# namespaces = c(tei = "https://www.tei-c.org/ns/1.0"))
expect_equal(unname(as.character(actual)), "The Pquick Pbrown Pfox")
actual <- readtext("../data/xml/tei.xml", collapse = "P",
text_field = "/d1:TEI//*/text()")#,
# namespaces = c(tei = "https://www.tei-c.org/ns/1.0"))
expect_equal(unname(as.character(actual)), "Lorem Ipsum 1PSome PlacePAnywhere, USPNopePThe Pquick Pbrown PfoxPNope")
})
test_that("test readtext() with docvarsfrom = filenames", {
expect_that(
dvars(readtext("../data/docvars/one/*", docvarsfrom = "filenames")),
equals(data.frame(list(docvar1 = c(1L, 2L), docvar2 = c("apple", "orange")),
stringsAsFactors = FALSE))
)
expect_that(
dvars(readtext("../data/docvars/dash/*", docvarsfrom = "filenames", dvsep = "-")),
equals(data.frame(list(docvar1 = c(1,2), docvar2 = c("apple", "orange")),
stringsAsFactors = FALSE))
)
expect_that(
dvars(readtext("../data/docvars/two/*txt", docvarsfrom = "filenames")),
equals(data.frame(list(docvar1 = c(1,2), docvar2 = c("apple", "orange")), docvar3 = c("red", "orange"),
stringsAsFactors = FALSE))
)
expect_error(
readtext("../data/docvars/two/*json", text_field = "nonesuch", docvarsfrom = "filenames"),
"There is no field called"
)
expect_error(
readtext("../data/docvars/unequal/*", docvarsfrom = "filenames"),
"Filename elements are not equal in length."
)
expect_that(
dvars(readtext("../data/docvars/two/*txt", docvarsfrom = "filenames",
docvarnames = c("id", "fruit", "colour"))),
equals(data.frame(list(id = c(1,2), fruit = c("apple", "orange")),
colour = c("red", "orange"), stringsAsFactors = FALSE))
)
expect_warning(
dvars(readtext("../data/docvars/two/*txt", docvarsfrom = "filenames",
docvarnames = c("id", "fruit")
)),
"Fewer docnames supplied than existing docvars - last 1 docvar given generic names."
)
expect_that(
dvars(suppressWarnings(readtext("../data/docvars/two/*txt", docvarsfrom = "filenames",
docvarnames = c("id", "fruit")))),
equals(data.frame(list(id = c(1,2), fruit = c("apple", "orange")),
docvar3 = c("red", "orange"), stringsAsFactors = FALSE))
)
expect_warning(
dvars(readtext("../data/docvars/two/*txt", docvarsfrom = "filenames",
docvarnames = c("id")
)),
"Fewer docnames supplied than existing docvars - last 2 docvars given generic names."
)
#TODO: What happens if you supply more docnames?
expect_error(
dvars(readtext("../data/docvars/two/*txt", docvarsfrom = "nonesuch"))
)
# Docvars from both metadata and filename
expect_equal(
dvars(readtext("../data/docvars/csv/*", docvarsfrom = c("filenames"), docvarnames = c("id", "fruit"), text_field = "text")),
data.frame(list(shape = c("round", NA), texture = c(NA, "rough"), id = c(1, 2), fruit = c("apple", "orange")),
stringsAsFactors = FALSE)
)
# # Docvars from both metadata and filename
# expect_equal(
# dvars(readtext("../data/docvars/json/*", docvarsfrom = c("filenames", "metadata"), docvarnames = c("id", "fruit"), text_field = "text")),
# data.frame(list(id = c(1, 2), fruit = c("apple", "orange"), shape = c("round", NA), texture = c(NA, "rough")), stringsAsFactors = FALSE)
# )
})
test_that("test docvars.readtext warning with field != NULL", {
expect_identical(
dvars(readtext("../data/fox/fox.txt")),
data.frame(x = 1)[, 0, drop=FALSE]
)
})
test_that("test that readtext encoding argument must be either length 1 or same length as the number of files", {
expect_error(
readtext(
c("../data/fox/fox.txt", "../data/fox/fox.txt", "../data/fox/fox.txt", "../data/fox/fox.txt"),
encoding = c("utf-8", "utf-8")
),
"Encoding parameter must be length 1, or as long as the number of files"
)
})
context("Loading a corpus from a zip archive")
test_that("A single-level zip file containing txt files can be loaded",{
qc <- readtext("../data/zip/inauguralTopLevel.zip")
expect_equal(nrow(qc), 57)
})
context("Loading an empty gzipped tar archive")
test_that("An empty tar.gz file raises an error",{
expect_error(
readtext("../data/empty/test.tar.gz"),
"File '../data/empty/test.tar.gz' does not exist"
)
})
test_that("test reading structured text files with different columns", {
testcorpus <- readtext(
"../data/fruits/*.csv",
text_field = "text"
)
expect_equal(
dvars(testcorpus),
data.frame(list(
color = c("green", "orange", NA, NA),
shape = c(NA, NA, "round", "long")
),
stringsAsFactors = FALSE)
)
expected_texts <- c("apple", "orange", "apple", "banana")
names(expected_texts) <- c("1.csv.1", "1.csv.2", "2.csv.1", "2.csv.2")
expect_equal(
as.character(testcorpus),
expected_texts
)
})
context("Tests of new readtext internals. If these fail, it doesn't necessarily affect the exposed API")
context("Tests for list_files")
test_that("Test function to list files", {
expect_error(
readtext:::list_files("nonesuch://example.org/test.txt"),
"Unsupported URL scheme"
)
testExistingFile <- readtext:::get_temp()
file.create(testExistingFile)
expect_equal(readtext:::list_files(testExistingFile),
testExistingFile)
expect_equal(readtext:::list_files(paste0('file://', testExistingFile)),
testExistingFile)
# Test vector of filenames
testExistingFile2 <- readtext:::get_temp()
file.create(testExistingFile2)
expect_equal(
readtext:::list_files(c(testExistingFile, testExistingFile2)),
sort(c(testExistingFile, testExistingFile2))
)
# TODO: Test vector of filename and URL
expect_equal(
readtext:::list_files(c(testExistingFile, testExistingFile2)),
sort(c(testExistingFile, testExistingFile2))
)
file.remove(testExistingFile)
expect_error(
readtext:::list_files(testExistingFile),
"File '' does not exist"
)
expect_equal(
readtext:::list_files(testExistingFile, ignore_missing = TRUE),
character(0)
)
#Test globbing
tempdir <- readtext:::get_temp(directory = TRUE)
file.create(file.path(tempdir, "1.tsv"))
file.create(file.path(tempdir, "2.tsv"))
file.create(file.path(tempdir, "10.tsv"))
expect_equal(
length(readtext:::list_files(paste0(tempdir, "/", "*.tsv" ))),
3
)
expect_equal(
length(readtext:::list_files(paste0(tempdir, "/", "?.tsv" ))),
2
)
expect_error(
length(readtext:::list_files(paste0(tempdir, "/", "?.txt" ))),
"File '' does not exist"
)
# Test globbing subdir
tempsubdir1 <- readtext:::get_temp(temp_dir = tempdir, directory = TRUE)
tempsubdir2 <- readtext:::get_temp(temp_dir = tempdir, directory = TRUE)
file.create(file.path(tempsubdir1, "1.tsv"))
file.create(file.path(tempsubdir1, "2.tsv"))
file.create(file.path(tempsubdir2, "1.tsv"))
expect_equal(
length(readtext:::list_files(paste0(tempdir, "/", "*/", "?.tsv" ))),
3
)
expect_error(
readtext:::list_files("http://example.org/test.nonesuch"),
"Remote URL does not end in known extension."
)
})
test_that("Test function to list files with remote sources", {
skip_on_cran()
expect_error(
readtext:::list_files("https://www.google.com/404.txt"),
".*404.*"
)
expect_equal(
dim(readtext("https://www.google.com/404.txt", ignore_missing_files = TRUE)),
c(1,2)
)
})
test_that("text vectors have names of the files they come from by default (bug 221)", {
expect_equal(
names(as.character(readtext("../data/fox/fox.txt"))),
"fox.txt"
)
actual_names <- names(as.character(readtext(
"../data/csv/test*.csv", text_field = "text"
)))
expect_true(
setequal(
c("test.csv.1", "test.csv.2", "test2.csv.1", "test2.csv.2", "test3.csv.1", "test3.csv.2"),
actual_names
)
)
actual_names <- names(as.character(readtext(
"../data/glob/*.txt"
)))
expect_true(
setequal(
c("1.txt", "2.txt", "3.txt", "4.txt", "10.txt"),
actual_names
)
)
actual_names <- names(as.character(readtext(
"../data/tar/test.tar"
)))
expect_true(
setequal(
c("test.txt", "test2.txt", "test3.txt", "test4.txt"),
actual_names
)
)
})
test_that("test globbed tar file",{
skip_on_cran()
skip_on_os("linux")
expect_equal(
unname(as.character(readtext("../data/tar/*"))),
c("Lorem ipsum", "brown fox", "Dolor sit", "The quick")
)
})
test_that("test html file",{
expected <- c("The quick brown fox \njumps over the lazy dog")
names(expected) <- "html5.html"
expect_equal(
as.character(readtext("../data/html/html5.html")),
expected
)
})
test_that("test malformed html file",{
skip_on_os("windows")
expected <- c("The quick brown fox \n \njumps over the lazy dog")
names(expected) <- "malformed_html5.html"
expect_equal(
as.character(readtext("../data/html/malformed_html5.html")),
expected
)
})
test_that("test for pdf file", {
skip_on_os("windows")
expect_output(
cat(as.character(readtext("../data/pdf/test.pdf"))),
"The quick brown fox jumps over the lazy dog\n"
)
})
test_that("test for odt file", {
expected <- c("The quick brown fox jumps over the lazy dog")
names(expected) <- "test.odt"
expect_equal(
as.character(readtext("../data/odt/test.odt")),
expected
)
})
test_that("test for docx file", {
expected <- c("The quick brown fox jumps over the lazy dog")
names(expected) <- "test.docx"
expect_equal(
as.character(readtext("../data/docx/test.docx")),
expected
)
})
test_that("test for doc file", {
skip_on_os("windows")
expected <- paste(rep(c("The quick brown fox jumps over the lazy dog."), 10), collapse = " ")
names(expected) <- "test.doc"
txts <- as.character(readtext("../data/doc/test.doc"))
namestmp <- names(txts)
txts <- stringi::stri_replace_all_regex(txts, "\\n", " ")
names(txts) <- namestmp
expect_equal(
txts,
expected
)
})
test_that("test json files", {
skip_on_cran()
skip_on_os("linux")
expect_equal(
unname(as.character(readtext("../data/json/test*json", text_field = "text"))),
c("Lorem ipsum", "Dolor sit", "The quick", "brown fox", "Now is the winter")
)
# test.json and test2.json are newline-delimited json
# test3.json is a single json object
expected_docvars <- data.frame(list(
colour = c("green", "red", "orange", "blue", NA),
number = c(42, 99, 0, NA, 3)),
stringsAsFactors = FALSE)
expected_docvars <- expected_docvars[order(expected_docvars$number),]
row.names(expected_docvars) <- NULL
actual_docvars <- dvars(readtext("../data/json/test*json", text_field = "text"))
actual_docvars <- actual_docvars[order(actual_docvars$number),]
row.names(actual_docvars) <- NULL
row.names(actual_docvars)
row.names(expected_docvars)
expect_equal(
actual_docvars,
expected_docvars
)
expect_error(
readtext("../data/json/test*json", text_field = 1),
"Cannot use numeric text_field with json file"
)
expect_error(
readtext("../data/json/test3.json", text_field = "nonesuch"),
"There is no field called nonesuch in file"
)
# Twitter json files
tweetSource <- readtext("../data/tweets/stream.json", source = "twitter")
expect_equal(
as.character(tweetSource),
c(stream.json.1 = "I jumped over the lazy @dog", stream.json.2 = "Yawn")
)
expect_equal(
dvars(tweetSource)$statuses_count,
c(16204, 200)
)
expect_equal(
dvars(tweetSource)$screen_name,
c("foxxy", "dog")
)
})
if (.Platform$OS.type == "unix") {
test_that("test readtext with folder", {
expect_equal(
length(readtext("../data/fruits")$text),
7
)
})
}
context("Loading a corpus from a tar archive")
test_that("A single-level tar file containing txt files can be loaded",{
skip_on_cran()
skip_on_os("linux")
expect_equal(
unname(as.character(readtext("../data/tar/test.tar"))),
c("Lorem ipsum", "brown fox", "Dolor sit", "The quick")
)
})
context("Loading a corpus from a gzipped tar archive")
test_that("A single-level tar.gz file containing txt files can be loaded",{
skip_on_cran()
skip_on_os("linux")
expect_equal(
unname(as.character(readtext("../data/targz/test.tar.gz"))),
c("Lorem ipsum", "brown fox", "Dolor sit", "The quick")
)
})
context("Loading a corpus from a bzipped tar archive")
test_that("A single-level tar.bz file containing txt files can be loaded",{
skip_on_cran()
skip_on_os("linux")
skip_on_os("windows")
expect_equal(
unname(as.character(readtext("../data/tarbz/test.tar.bz"))),
c("Lorem ipsum", "brown fox", "Dolor sit", "The quick")
)
})
context("Tests for verbosity argument")
test_that("test warning for unrecognized filetype", {
expect_warning(
readtext("../data/empty/empty.nonesuch"),
paste0("Unsupported extension ", sQuote("nonesuch"), " of file")
)
expect_warning(
readtext("../data/empty/empty.nonesuch", verbosity = 3),
paste0("Unsupported extension ", sQuote("nonesuch"), " of file")
)
expect_warning(
readtext("../data/empty/empty.nonesuch", verbosity = 2),
paste0("Unsupported extension ", sQuote("nonesuch"), " of file")
)
expect_warning(
readtext("../data/empty/empty.nonesuch", verbosity = 1),
paste0("Unsupported extension ", sQuote("nonesuch"), " of file")
)
expect_silent(
readtext("../data/empty/empty.nonesuch", verbosity = 0)
)
})
test_that("messages from list_file",{
expect_silent(
readtext("../data/zip/inauguralTopLevel.zip", verbosity = 0)
)
expect_silent(
readtext("../data/zip/inauguralTopLevel.zip", verbosity = 1)
)
expect_message(
readtext("../data/zip/inauguralTopLevel.zip", verbosity = 2),
"Reading texts from \\.\\./data/zip/inauguralTopLevel\\.zip"
)
expect_message(
readtext("../data/zip/inauguralTopLevel.zip", verbosity = 3),
"reading \\(txt\\) file: .*1789-Washington\\.txt"
)
})
test_that("readtext called with textfield works with deprecation warning", {
expect_equal(
nrow(readtext("../data/csv/test*.csv", text_field = "text")),
6
)
expect_equal(
nrow(dvars(readtext("../data/csv/test*.csv", text_field = "text"))),
6
)
expect_equal(
length(as.character(readtext("../data/csv/test*.csv", text_field = "text"))),
6
)
expect_warning(
readtext("../data/csv/test*.csv", textfield = "text"),
"textfield is deprecated; use text_field instead"
)
})
test_that("tests for Excel files", {
expect_equal(unname(as.character(
readtext("../data/xls/test.xlsx", text_field = "text"))),
c("The quick", "brown fox", "jumps over", "the lazy dog.")
)
expect_that(
dvars(readtext("../data/xls/test.xlsx", text_field = "text")),
equals(data.frame(list(
colour = c("orange", "blue", "pink", "pink"),
number = c(0, NA, NA, NA),
taste = c(NA, NA, "sweet", "umami")
), stringsAsFactors = FALSE))
)
expect_equal(
as.character(readtext("../data/xls/test.xls", text_field = "text")),
c("test.xls.1" = "The quick", "test.xls.2" = "brown fox",
"test.xls.3" = "jumps over", "test.xls.4" = "the lazy dog.")
)
expect_that(
dvars(readtext("../data/xls/test.xls", text_field = "text")),
equals(data.frame(list(
colour = c("orange", "blue", "pink", "pink"),
number = c(0, NA, NA, NA),
taste = c(NA, NA, "sweet", "umami")
), stringsAsFactors = FALSE))
)
})
test_that("tests for ODS files", {
expect_equal(unname(as.character(
readtext("../data/ods/test.ods", text_field = "text"))),
c("The quick", "brown fox", "jumps over", "the lazy dog.")
)
expect_identical(
readtext("../data/ods/test.ods", text_field = "text"),
structure(list(doc_id = c("test.ods.1", "test.ods.2", "test.ods.3",
"test.ods.4"), text = c("The quick", "brown fox", "jumps over",
"the lazy dog."), colour = c("orange", "blue", "pink", "pink"
), number = c(0L, NA, NA, NA), taste = c(NA, NA, "sweet", "umami"
)), row.names = c(NA, -4L), class = c("readtext", "data.frame"
))
)
})
test_that("rases error when source is not valid", {
expect_error(
readtext('../data/nexis/sun_2000-11-01_0001.html', source = 1),
'source must be a character'
)
expect_error(
readtext('../data/nexis/sun_2000-11-01_0001.html', source = 'something'),
"'nexis' is the only source type available for HTML."
)
expect_silent(
readtext('../data/nexis/sun_2000-11-01_0001.html', source = 'nexis')
)
expect_error(
readtext('../data/tweets/stream.json', source = 1),
'source must be a character'
)
expect_error(
readtext('../data/tweets/stream.json', source = 'something'),
"'twitter' is the only source type available for json"
)
expect_silent(
readtext('../data/tweets/stream.json', source = 'twitter')
)
})
test_that("readtext works with one-column csv files (#138)", {
expect_equivalent(
readtext("../data/csv/data_onecol.csv"),
data.frame(doc_id = paste("data_onecol.csv", 1:2, sep = "."),
text = c("foo foo foo foo", "bar bar bar bar"),
stringsAsFactors = FALSE)
)
expect_equivalent(
readtext("../data/csv/data_twocol.csv"),
data.frame(doc_id = paste("data_twocol.csv", 1:2, sep = "."),
text = c("foo foo foo foo", "bar bar bar bar"),
y = 1:2,
stringsAsFactors = FALSE)
)
expect_equivalent(
readtext("../data/csv/data_twocol.csv", text_field = "x", docid_field = "y"),
data.frame(doc_id = c("1", "2"),
text = c("foo foo foo foo", "bar bar bar bar"),
stringsAsFactors = FALSE)
)
})
test_that("tests for ODS files", {
expect_identical(
unname(as.character(readtext("../data/rtf/*.rtf"))),
c("The quick brown fox jumps over the lazy dog",
"This is an example of “rich text” format.")
)
})
test_that("tests for files with doc_id", {
expect_identical(
as.character(readtext("../data/csv/withdocid.csv", docid_field = "doc_id", text_field = "text")),
c(doc1 = "The quick", doc2 = "brown fox", doc3 = "jumped over")
)
expect_identical(
as.character(readtext("../data/csv/withdocid.csv", text_field = "text"))[1],
c(withdocid.csv.1 = "The quick")
)
expect_identical(
as.character(readtext("../data/ods/withdocid.ods", docid_field = "doc_id", text_field = "text")),
c(doc1 = "The quick", doc2 = "brown fox", doc3 = "jumped over")
)
expect_identical(
as.character(readtext("../data/xls/withdocid.xls", docid_field = "doc_id", text_field = "text")),
c(doc1 = "The quick", doc2 = "brown fox", doc3 = "jumped over")
)
expect_identical(
as.character(readtext("../data/json/withdocid.json", docid_field = "doc_id", text_field = "text")),
c(doc1 = "Lorem ipsum", doc2 = "Dolor sit")
)
expect_message(
readtext("../data/csv/withdocid.csv", text_field = "text"),
'A field called "doc_id" exists in the file. If you intend to use it as a document identifier, use "docid_field" option.')
expect_error(
readtext("../data/xls/withdocid.xls", docid_field = "nonesuch"),
"There is no field called nonesuch"
)
expect_error(
readtext("../data/xls/withdocid.xls", docid_field = 9000),
"There is no 9000th field"
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.