test-utils.R
In dplR: Dendrochronology Program Library in R

context("utility functions")
test.uuid.gen <- function() {
    ## Setup
    SAMP.SIZE <- 100
    ugen <- uuid.gen()
    uuids <- character(SAMP.SIZE)
    for (i in seq_len(SAMP.SIZE)) {
        uuids[i] <- ugen()
    }
    uuids.split <- strsplit(uuids, split="-", fixed=TRUE)
    unique.nchar <- unique(t(sapply(uuids.split, nchar)))
    unique.chars <-
        unique(strsplit(paste(sapply(uuids.split, paste, collapse=""),
                              collapse=""), split=character(0))[[1]])
    all.4 <- unique(substr(uuids, 15, 15))
    one.of.four <- unique(substr(uuids, 20, 20))
    ## Test
    test_that("unique IDs are unique", {
        expect_equal(SAMP.SIZE, length(unique(uuids)))
    })
    test_that("UUIDs have the right parts", {
        expect_true(all(nchar(uuids) == 36))
        expect_true(all(sapply(uuids.split, length) == 5))
        expect_true(nrow(unique.nchar) == 1 &&
                    all(as.vector(unique.nchar) == c(8, 4, 4, 4, 12)))
        expect_true(all(unique.chars %in%
                        c(as.character(0:9), letters[seq_len(6)])))
    })
    test_that("UUID fixed bits are correct", {
        expect_equal(all.4, "4")
        expect_true(all(one.of.four %in% c("8", "9", "a", "b")))
    })
}
test.uuid.gen()

## If 'testDocument' is TRUE, produces a test document to 'con', which may
## be a connection or a filename.
test.latexify <-
    function(testDocument=FALSE,
             con=tempfile(pattern = "latexify", fileext = ".tex"))
{
    ## First part of tests: basic character set

    ## Number of test strings (first part, including one "extra
    ## difficult" case and one empty string)
    SAMP.SIZE <- 50
    stopifnot(SAMP.SIZE >= 2)
    MIN.LENGTH <- 1
    MAX.LENGTH <- 1000
    MAX.NCHAR <- 20 # maximum length of a "word"
    ## All ASCII characters except NUL (0)
    characters <- rawToChar(as.raw(1:127), multiple = TRUE)
    ## LaTeX special characters.  Some of these must be converted to
    ## commands.  Others (e.g. single and double quote, *) are
    ## converted to commands or other characters for improved
    ## compatibility with other packages or to get a particular glyph
    ## (upright quote, centered asterisk) instead of the default
    ## (curved quote, asterisk located higher than the center of the
    ## line).  Some characters would work fine in font encodings other
    ## than OT1 (e.g. <, >, |), but are converted to commands anyway.
    ## The dash is included to prevent -- and --- turning into an n
    ## dash and an m dash, respectively.
    ##
    ## NOTE that the handling (what kind of treatment if any) of some
    ## characters (currently quotes) depends on the (default)
    ## arguments of latexify().
    specialChars <-
        c("{", "}", "\\", "#", "$", "%", "^", "&", "_", "~", "\"", "/", "'",
          "<", ">", "|", "`", "-")
    specialStr <- paste(specialChars, collapse="")
    ## latexify() is designed to convert any sequence of space
    ## characters to a single regular space
    spaceChars <- c("\t", "\n", "\v", "\f", "\r", " ")
    spaceStr <- paste(spaceChars, collapse="")
    ## latexify() is designed to drop control characters excluding spaces
    controlChars <- setdiff(rawToChar(as.raw(c(1:31, 127)), multiple = TRUE),
                            spaceChars)
    controlStr <- paste(controlChars, collapse="")
    ## Decide the length of each test string
    stringLengths <- sample(MIN.LENGTH:MAX.LENGTH, SAMP.SIZE - 2)
    nTotal <- sum(stringLengths)
    ## Create the test strings:
    ## * The last element is a "difficult case".
    ## * The other elements consist of a random sample of characters.
    strStop <- cumsum(stringLengths)
    strStart <- strStop - (stringLengths - 1)
    nSpaces <- round(0.2 * nTotal) # In addition to spaces in 'characters'
    testStrings <-
        c(substring(paste(sample(c(rep(characters,
                                       length.out = nTotal - nSpaces),
                                   rep.int(" ", nSpaces))),
                          collapse=""), strStart, strStop),
          paste(c(specialChars, " ",
                  rev(specialChars), " ",
                  unlist(lapply(lapply(specialChars,rep,3), c, " ")),
                  paste(specialChars, " \t")),
                collapse=""),
          "")

    ## Run latexify() on testStrings
    ltxDouble <- latexify(testStrings, doublebackslash=TRUE)
    ltxSingle <- latexify(testStrings, doublebackslash=FALSE)

    ## Tests
    test_that("doublebackslash works", {
        expect_equal(ltxDouble,
                     gsub("\\", "\\\\",
                          ltxSingle, fixed=TRUE, useBytes=TRUE))
    })
    test_that("no control characters are left", {
        expect_false(any(grepl(sprintf("[%s]", controlStr),
                               ltxSingle, useBytes=TRUE)))
    })
    test_that("space characters collapse", {
        expect_false(any(grepl(sprintf("[%s]{2,}", spaceStr),
                               ltxSingle, useBytes=TRUE)))
    })
    test_that("there are no line breaks", {
        expect_false(any(grepl("\\\\", ltxSingle, fixed=TRUE)))
    })
    Letters <- paste(c(LETTERS, letters), collapse="")
    textCommand <- sprintf("\\\\[%s]+(\\{([^}]|\\\\})+})?", Letters)
    commandTerminated <-
        paste(textCommand,
              "((?<=})|\\{\\}| +|(?=$|[[:digit:],.?!;:\\\\}+*/-]))", sep="")
    commandsAt <- gregexpr(commandTerminated, ltxSingle, perl=TRUE)
    test_that("commands are terminated", {
        expect_equal(lapply(gregexpr(textCommand, ltxSingle), as.vector),
                     lapply(commandsAt, as.vector))
    })
    escape <- sprintf("\\\\[^%s](\\{\\})?", Letters)
    escapesAt <- gregexpr(escape, ltxSingle)

    ## specialMap: record of special character -> command mapping
    specialMap <- vector(mode="list", length = SAMP.SIZE)
    ## Test that each test string was converted properly and prepare
    ## specialMap
    multiSpaceClass <- sprintf("[%s]+", spaceStr)
    controlClass <- sprintf("[%s]", controlStr)
    specialClass <- sprintf("[%s]", specialStr)
    ## Record an ASCII representation of each element in 'testStrings',
    inputDescription <-
        capture.output(invisible(vapply(testStrings, dput, "")))
    for (i in seq_len(SAMP.SIZE)) {
        thisInfo <- inputDescription[i]
        nChars <- nchar(ltxSingle[i])
        ## ltxChars: split ltxSingle[i] into strings representing one
        ## character each
        if (nChars > 0) {
            coms <- commandsAt[[i]]
            escs <- escapesAt[[i]]
            comLengths <- attr(coms, "match.length")
            escLengths <- attr(escs, "match.length")
            if (length(coms) == 1 && coms == -1) {
                coms <- numeric(0)
                comLengths <- numeric(0)
            }
            if (length(escs) == 1 && escs == -1) {
                escs <- numeric(0)
                escLengths <- numeric(0)
            }
            comsAndEscs <- c(coms, escs)
            idx <- order(comsAndEscs)
            comEsc <- comsAndEscs[idx]
            comEscLen <- c(comLengths, escLengths)[idx]
            nComEsc <- length(comEsc)
            charIdx <- numeric(nChars)
            prv <- 0
            prvIdx <- 0
            for (j in seq_along(comEsc)) {
                thisStart <- comEsc[j]
                nSingle <- thisStart - prvIdx - 1
                charIdx[seq(from = prvIdx + 1, length.out = nSingle)] <-
                    seq(from = prv + 1, length.out = nSingle)
                prv <- prv + nSingle + 1
                prvIdx <- thisStart + (comEscLen[j] - 1)
                charIdx[thisStart:prvIdx] <- prv
            }
            nSingle <- nChars - prvIdx
            charIdx[seq(from = prvIdx + 1, length.out = nSingle)] <-
                seq(from = prv + 1, length.out = nSingle)
            ## Each element of ltxChars should represent one character or
            ## a space between words
            strStart <- which(diff(c(0, charIdx)) > 0)
            strStop <- c(strStart[-1] - 1, nChars)
            ## Strip off empty group or spaces following a command
            ltxChars <- sub(sprintf("([%s])(\\{\\}| +)$", Letters), "\\1",
                            substring(ltxSingle[i], strStart, strStop))
        } else {
            ltxChars <- character(0)
        }
        ## stripChars: "Independently" do a part of what latexify()
        ## does, i.e. remove control characters and use single spaces
        ## only
        stripChars <-
            strsplit(gsub(multiSpaceClass,
                          " ",
                          gsub(controlClass,
                               "",
                               testStrings[i])),
                     "")[[1]]
        singleFlag <- nchar(ltxChars) == 1
        multiFlag <- !singleFlag
        specialMap[[i]] <- unique(cbind(stripChars[multiFlag],
                                        ltxChars[multiFlag]))
        ## Compare ltxChars and stripChars
        test_that("normal and special characters are treated correctly", {
            expect_equal(length(stripChars), length(ltxChars),
                         info=thisInfo, tolerance=0)
            expect_false(any(grepl(specialClass,
                                   ltxChars[singleFlag], useBytes=TRUE)),
                        info=thisInfo)
            expect_equal(ltxChars[singleFlag], stripChars[singleFlag],
                         info=thisInfo)
        })
    }
    ## specialMap becomes a combination of the unique rows across its
    ## elements
    specialMap <- do.call(rbind, specialMap)
    specialMap <- unique(specialMap)
    test_that("mapping of special characters is consistent", {
        expect_equal(nrow(specialMap), length(specialChars))
        expect_true(all(specialChars %in% specialMap[, 1]))
    })

    ## Second part of tests: extended character set

    ## A test for handling of different encodings in the input

    ## The following string must have a literal (escaped) backspace
    ## and "x" in front of every "special" character (byte), and each
    ## character code must consist of two hexadecimal digits.
    bytePrint <- "clich\\xe9\\x0ama\\xf1ana" # "\x0a" is a newline

    codeLoc <- as.vector(gregexpr("\\x", bytePrint, fixed=TRUE)[[1]])
    asSuch <- substring(bytePrint,
                        c(1, codeLoc + 4),
                        c(codeLoc - 1, nchar(bytePrint)))
    special <-
        rawToChar(as.raw(paste0("0x", substring(bytePrint,
                                                codeLoc + 2, codeLoc + 3))),
                  multiple = TRUE)
    latin1String <- paste0(asSuch, c(special, ""), collapse="")
    Encoding(latin1String) <- "latin1"
    byteString <- latin1String
    Encoding(byteString) <- "bytes"

    latinConverted <- latexify(latin1String, doublebackslash=FALSE)
    byteConverted <- latexify(byteString, doublebackslash=FALSE)
    test_that("latexify handles different encodings", {
        expect_equal(latinConverted, "clich\\'{e} ma\\~{n}ana")
        expect_equal(latexify(enc2utf8(latin1String), doublebackslash=FALSE),
                     latinConverted)
        expect_equal(tolower(byteConverted),# do hex codes print in lower case?
                     gsub("\\", "\\textbackslash ", bytePrint, fixed=TRUE))
    })

    ## A test for other than default quoting options
    quoteString <- "\"It's five o'clock\", he said."
    res1 <- latexify(quoteString, doublebackslash=FALSE)
    res2 <- latexify(quoteString, quotes="curved", doublebackslash=FALSE)
    res3 <- latexify(quoteString, packages=character(0), doublebackslash=FALSE)
    res4 <- latexify(quoteString, packages="fontenc", doublebackslash=FALSE)
    res5 <- latexify(quoteString, packages="textcomp", doublebackslash=FALSE)
    exp2 <- sub("\"", "\\\\textquotedblright ", quoteString)
    exp2 <- sub("\"", "\\\\textquotedblright", exp2)
    exp4 <- sub("\"", "\\\\textquotedbl ", quoteString)
    exp4 <- sub("\"", "\\\\textquotedbl", exp4)
    exp5 <- gsub("'", "\\\\textquotesingle ", exp2)
    exp1 <- gsub("'", "\\\\textquotesingle ", exp4)
    test_that("quoting options work", {
        expect_equal(res1, exp1, info="Default straight quotes")
        expect_equal(res2, exp2, info="Curved quotes")
        expect_equal(res3, res2, info="Fallback to curved quotes")
        expect_equal(res4, exp4, info="Fallback to curved single quotes")
        expect_equal(res5, exp5, info="Fallback to curved double quotes")
    })
    ## Check that non-ASCII quotes used by dQuote() and sQuote() are
    ## converted to LaTeX commands
    nestQuotes <- paste0("You said, \u201cHe said, ",
                         "\u2018Have a nice day.\u2019\u201d")
    nq <- latexify(nestQuotes, doublebackslash=FALSE)
    test_that("dQuote() and sQuote() are safe", {
        expect_equal(nq,
                     gsub("\\{\\}(?=\\\\)", "",
                          gsub("\u2018", "\\\\textquoteleft ",
                               gsub("\u2019", "\\\\textquoteright",
                                    gsub("\u201c", "\\\\textquotedblleft ",
                                         gsub("\u201d",
                                              "\\\\textquotedblright",
                                              nestQuotes)))),
                          perl=TRUE))
    })
    diaeresisD <- "o\u0308ljysa\u0308ilio\u0308"
    diasD <- latexify(diaeresisD, doublebackslash=FALSE)
    diaeresisC <- "\u00f6ljys\u00e4ili\u00f6"
    diasC <- latexify(diaeresisC, doublebackslash=FALSE)
    test_that("Unicode NFD and NFC are handled the same way", {
        expect_equal(diasD, diasC)
    })
    ## Strings containing practically every non-ASCII character that
    ## will be converted by latexify()
    breakWords <- rep.int(c("space", "vacation", "movie", "line", "break",
                            "rope", "period"), 2)
    allChars <-
        c("\u0132sselmeer is a lake, Berl\u0133n is Dutch for Berlin.",
          paste0("Other digraphs and ligatures: \u01f1, \u01f2, \u01f3, ",
                 "\u01c4, \u01c5, \u01c6, \u01c7, \u01c8, \u01c9, \u01ca, ",
                 "\u01cb, \u01cc, \ufb00, \ufb01, \ufb02, \ufb03, \ufb04, ",
                 "\ufb05, \ufb06"),
          paste0("\u017c\u00f3\u0142ty, p\u0113c, f\u00eate, ",
                 "vis-\u00e0-vis, pi\u00f1ata, Erd\u0151s, ",
                 "\u00c5ngstr\u00f6m, m\u0103r, t\u0159i, \u0203, t\u0331, ",
                 "fa\u00e7ade, \u1e0c, ",
                 "k\u0361p (there should be a ligature tie), t\u0105sa"),
          "Circled: a\u20dd, \u00f1\u20dd",
          "\u00a1Hola! \u00bfQu\u00e9 pasa? \u2e18Verdad\u203d",
          paste0("Money: \u00a3, \u20ac, \u00a2, \u00a5, \u00a4, \u0e3f, ",
                 "\u20a1, \u20ab, \u20b2, \u20a4, \u20a6, \u20b1, \u20a9"),
          paste0("No-Break\u00a0", breakWords, collapse=" "),
          paste0("Do-Break ", breakWords, collapse=" "),
          "visible\u2423space",
          paste0(rep.int("Zero\u200bWidth\u200bSpace\u200b", 10), collapse=""),
          paste0(rep.int(paste0("S\u00ado\u00adf\u00adt\u00ad",
                                "H\u00ady\u00adp\u00adh\u00ade\u00adn\u00ad",
                                "E\u00adv\u00ade\u00adr\u00ady\u00ad",
                                "w\u00adh\u00ade\u00adr\u00ade"), 8),
                 collapse="\u00ad"),
          "Legal \u00a7, \u00a9, \u00ae, \u00b6, \u2117, \u2120, \u2122",
          paste0("Letters \u00c6, \u00e6, \u0152, \u0153, \u00d8, \u00f8, ",
                 "\u0141, \u0142, \u1e9e, \u00df, \u017f, \u00d0, \u00f0, ",
                 "\u0110, \u0111, \u014a, \u014b, \u00de, \u00fe"),
          paste0("Quotes: \u00ab | \u00bb | \u201a | \u201e | \u2039 | \u203a",
                 " | \u2018 | \u2019 | \u201c | \u201d"),
          paste0("mho \u2127, ohm \u03a9, micro \u00b5, Celsius \u2103, ",
                 "degree \u00b0"),
          paste0("Division \u00f7, times \u00d7, fractions \u00bc, \u00bd, ",
                 "\u00be, plusminus \u00b1, root \u221a, minus \u2212, ",
                 "asterisk \u2217, not \u00ac, fraction solidus \u2044, ",
                 "superscript \u00b9, \u00b2, \u00b3"),
          paste0("Discount \u2052, estimated \u212e, \u2116 5, ",
                 "per mille \u2030, parts per ten thousand \u2031"),
          "Bullets: \u2022 Closed, \u25e6 open",
          paste0("Spacing diacritics: double acute \u02dd, acute \u00b4, ",
                 "cedilla \u00b8, breve \u02d8, ",
                 "caron \u02c7, diaeresis \u00a8, macron \u00af"),
          "Arrows: left \u2190, up \u2191, right \u2192, down \u2193",
          "Delimiters: \u3008, \u3009, \u301a, \u301b, \u2045, \u2046",
          paste0("Miscellaneous: feminine ordinal \u00aa, ",
                 "masculine ordinal \u00ba, middle dot \u00b7, ",
                 "daggers \u2020, \u2021, ellipsis \u2026, ",
                 "double vertical line \u2016, large circle \u25ef, ",
                 "blank symbol \u2422, broken bar \u00a6, recipe \u211e, ",
                 "reference mark \u203b, low tilde \u02f7, ",
                 "en dash \u2013, em dash \u2014"))
    ac <- latexify(allChars, doublebackslash=FALSE)
    test_that("a certain set of non-ASCII characters is converted", {
        expect_true(all(Encoding(ac) == "unknown"))
    })
    ## When used independently outside the test suite, the function
    ## can create a test document, but only in a UTF-8 locale.
    if (isTRUE(testDocument) && isTRUE(l10n_info()[["UTF-8"]])) {
        preamble <- c("\\documentclass[a4paper]{article}",
                      "\\usepackage{etoolbox}",
                      "\\usepackage{ifluatex}",
                      "\\usepackage{ifxetex}",
                      "\\usepackage{parskip}",
                      "\\usepackage{textcomp}",
                      "\\ifbool{luatex}{",
                      "\\usepackage{fontspec}",
                      "}{",
                      "\\ifbool{xetex}{",
                      "\\usepackage{fontspec}",
                      "}{",
                      "\\usepackage[T1]{fontenc}",
                      "\\usepackage{lmodern}",
                      "}}")
        id <- c(latin1String, byteString, rep.int(quoteString, 5),
                nestQuotes, diaeresisD, diaeresisC, allChars)
        extraInfo <- c(rep.int("", length(testStrings) + length(latin1String) +
                               length(byteString)),
                       paste0(" (", c("default", "curved", "no packages",
                                      "only fontenc", "only textcomp"), ")"),
                       rep.int("", length(nestQuotes)),
                       rep.int(" (Unicode NFD)", length(diaeresisD)),
                       rep.int(" (Unicode NFC)", length(diaeresisC)),
                       rep.int("", length(allChars)))

        ## Record an ASCII representation of each element in 'id',
        ## add to previous record of 'testStrings'
        inputDescription <- c(inputDescription,
                              capture.output(invisible(vapply(id, dput, ""))))

        allOutput <- c(ltxSingle, latinConverted, byteConverted, res1, res2,
                       res3, res4, res5, nq, diasD, diasC, ac)
        if (is.character(con)) {
            co <- file(con, open = "wt", encoding = "UTF-8")
            on.exit(close(co))
        } else {
            co <- con
            if (!isOpen(co)) {
                open(co, open = "wt")
                on.exit(close(co))
            }
        }
        writeLines(preamble, co)
        writeLines("\\begin{document}", co)
        writeLines("\\begin{enumerate}", co)
        writeLines(paste0("% ", inputDescription, extraInfo, "\n",
                          "\\item\\relax ", allOutput,
                          "% ", seq_along(allOutput)),
                   co, sep = "\n\n")
        writeLines("\\end{enumerate}", co)
        writeLines("\\end{document}", co)
        con
    }
}
test.latexify()

test.latexDate <- function() {
    SAMP.SIZE <- 100
    dates <- Sys.Date() + round(runif(SAMP.SIZE, min = -1000, max = 1000))
    latexDates <- latexDate(dates)
    test_that("length of output equals length of input", {
        expect_equal(length(dates), length(latexDates), tolerance=0)
    })
    splitDates <- strsplit(latexDates, ", ")
    test_that("year is at the end, separated by comma and space", {
        expect_equal(vapply(splitDates, length, numeric(1)),
                     rep.int(2, length(dates)), tolerance=0)
    })
    monthsDays <- vapply(splitDates, "[[", character(1), 1)
    splitDates2 <- strsplit(monthsDays, " ")
    test_that("month and day are separated by space", {
        expect_equal(vapply(splitDates2, length, numeric(1)),
                     rep.int(2, length(dates)), tolerance=0)
    })
    yearStr <- vapply(splitDates, "[[", character(1), 2)
    Months <- match(vapply(splitDates2, "[[", character(1), 1), month.name)
    monthStr <- sprintf("%02.0f", Months)
    Days <- suppressWarnings(as.numeric(vapply(splitDates2,
                                               "[[", character(1), 2)))
    dayStr <- sprintf("%02.0f", Days)
    test_that("output of latexDate(x) represents date x", {
        expect_equal(paste(yearStr, monthStr, dayStr, sep="-"),
                     as.character(dates))
    })
}
test.latexDate()