inst/tinytest/test_digest.R

## tests for digest, taken from the examples in the manual page

suppressMessages(library(digest))

## Standard RFC 1321 test vectors
md5Input <-
    c("",
      "a",
      "abc",
      "message digest",
      "abcdefghijklmnopqrstuvwxyz",
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
      paste("12345678901234567890123456789012345678901234567890123456789012",
            "345678901234567890", sep=""))
md5Output <-
    c("d41d8cd98f00b204e9800998ecf8427e",
      "0cc175b9c0f1b6a831c399e269772661",
      "900150983cd24fb0d6963f7d28e17f72",
      "f96b697d7cb7938d525a2f31aaf161d0",
      "c3fcd3d76192e4007dfb496cca67e13b",
      "d174ab98d277d9f5a5611c2c9f419d9f",
      "57edf4a22be3c955ac49da2e2107b67a")

for (i in seq(along.with=md5Input)) {
    md5 <- digest(md5Input[i], serialize=FALSE)
    expect_true(identical(md5, md5Output[i]))
    #cat(md5, "\n")
}

md5 <- getVDigest()
expect_identical(md5(md5Input, serialize = FALSE), md5Output)

expect_identical(digest(NULL),
                 md5(NULL))
expect_identical(digest(character(0)),
                 md5(character(0)))
expect_identical(digest(list("abc")),
                 md5(list(list("abc"))))
expect_identical(digest(list(NULL)),
                 md5(list(list(NULL))))
expect_identical(digest(character(0), serialize = FALSE),
                 md5(character(0), serialize = FALSE))


## md5 raw output test
for (i in seq(along.with=md5Input)) {
    md5 <- digest(md5Input[i], serialize=FALSE, raw=TRUE)
    md5 <- gsub(" ","",capture.output(cat(md5)))
    expect_true(identical(md5, md5Output[i]))
    #cat(md5, "\n")
}

sha1Input <-
    c("abc",
      "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
      NULL)
sha1Output <-
    c("a9993e364706816aba3e25717850c26c9cd0d89d",
      "84983e441c3bd26ebaae4aa1f95129e5e54670f1",
      "34aa973cd4c4daa4f61eeb2bdbad27316534016f")

for (i in seq(along.with=sha1Input)) {
    sha1 <- digest(sha1Input[i], algo="sha1", serialize=FALSE)
    expect_true(identical(sha1, sha1Output[i]))
    #cat(sha1, "\n")
}

sha1 <- getVDigest(algo = 'sha1')
expect_identical(sha1(sha1Input, serialize = FALSE), sha1Output[1:2])

## sha1 raw output test
for (i in seq(along.with=sha1Input)) {
    sha1 <- digest(sha1Input[i], algo="sha1", serialize=FALSE, raw=TRUE)
    #print(sha1)
    sha1 <- gsub(" ","",capture.output(cat(sha1)))
    #print(sha1)
    #print(sha1Output[i])
    expect_true(identical(sha1, sha1Output[i]))
    #cat(sha1, "\n")
}

## sha512 test
sha512Input <-c(
    "",
    "The quick brown fox jumps over the lazy dog."
    )
sha512Output <- c(
    "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
    "91ea1245f20d46ae9a037a989f54f1f790f0a47607eeb8a14d12890cea77a1bbc6c7ed9cf205e67b7f2b8fd4c7dfd3a7a8617e45f3c463d481c7e586c39ac1ed")

for (i in seq(along.with=sha512Input)) {
    sha512 <- digest(sha512Input[i], algo="sha512", serialize=FALSE)
    expect_true(identical(sha512, sha512Output[i]))
    #cat(sha512, "\n")
}

sha512 <- getVDigest(algo = 'sha512')
expect_identical(sha512(sha512Input, serialize = FALSE), sha512Output[1:2])

## sha512 raw output test
for (i in seq(along.with=sha512Input)) {
    sha512 <- digest(sha512Input[i], algo="sha512", serialize=FALSE, raw=TRUE)
    #print(sha512)

    sha512 <- gsub(" ","",capture.output(cat(sha512)))
    #print(sha512)
    #print(sha512Output[i])
    expect_true(identical(sha512, sha512Output[i]))
    #cat(sha512, "\n")
}

crc32Input <-
    c("abc",
      "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
      NULL)
crc32Output <-
    c("352441c2",
      "171a3f5f",
      "2ef80172")

for (i in seq(along.with=crc32Input)) {
    crc32 <- digest(crc32Input[i], algo="crc32", serialize=FALSE)
    expect_true(identical(crc32, crc32Output[i]))
    #cat(crc32, "\n")
}

crc32 <- getVDigest(algo = 'crc32')
expect_identical(crc32(crc32Input, serialize = FALSE), crc32Output[1:2])


## one of the FIPS-
sha1 <- digest("abc", algo="sha1", serialize=FALSE)
expect_true(identical(sha1, "a9993e364706816aba3e25717850c26c9cd0d89d"))

## This one seems to give slightly different output depending on the R version used
##
##                                      # example of a digest of a standard R list structure
## cat(digest(list(LETTERS, data.frame(a=letters[1:5],
##                                     b=matrix(1:10,
##                                     ncol=2)))), "\n")

## these outputs were calculated using xxh32sum
## [ Correction:  These reproduce via the Python xxhash package and its hexdigest() output
##   but not the xxh64sum command-line tool as the original comment here implies. ]
xxhash32Input <-
    c("abc",
      "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
      "")
xxhash32Output <-
    c("32d153ff",
      "89ea60c3",
      "02cc5d05")

for (i in seq(along.with=xxhash32Input)) {
    xxhash32 <- digest(xxhash32Input[i], algo="xxhash32", serialize=FALSE)
    #cat(xxhash32, "\n")
    expect_true(identical(xxhash32, xxhash32Output[i]))
}

xxhash32 <- getVDigest(algo = 'xxhash32')
expect_identical(xxhash32(xxhash32Input, serialize = FALSE), xxhash32Output)


## these outputs were calculated using xxh64sum
## [ Correction:  These reproduce via the Python xxhash package and its hexdigest() output
##   but not the xxh64sum command-line tool as the original comment here implies. ]
xxhash64Input <-
    c("abc",
      "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
      "")
xxhash64Output <-
    c("44bc2cf5ad770999",
      "f06103773e8585df",
      "ef46db3751d8e999")

for (i in seq(along.with=xxhash64Input)) {
    xxhash64 <- digest(xxhash64Input[i], algo="xxhash64", serialize=FALSE)
    #cat(xxhash64, "\n")
    expect_true(identical(xxhash64, xxhash64Output[i]))
}

xxhash64 <- getVDigest(algo = 'xxhash64')
expect_identical(xxhash64(xxhash64Input, serialize = FALSE), xxhash64Output)


## these outputs were calculated using mmh3 python package
## the first two are also shown at this StackOverflow question on test vectors
##   https://stackoverflow.com/questions/14747343/murmurhash3-test-vectors
murmur32Input <-
    c("abc",
      "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
      "")
murmur32Output <-
    c("b3dd93fa",
      "ee925b90",
      "00000000")

for (i in seq(along.with=murmur32Input)) {
    murmur32 <- digest(murmur32Input[i], algo="murmur32", serialize=FALSE)
    #cat(murmur32, "\n")
    expect_true(identical(murmur32, murmur32Output[i]))
}

murmur32 <- getVDigest(algo = 'murmur32')
expect_identical(murmur32(murmur32Input, serialize = FALSE), murmur32Output)


## tests for digest spooky

## Per PR 205, see comment in https://github.com/facebook/folly/blob/4c603f8c2add8d0228de0e073c5ae3ce9b02b6f3/folly/hash/SpookyHashV2.h#L35-L36
## Values ought to be sensible on big endian too but different from little endian reference
## so we do not test on big endian
if (isTRUE(.Call(digest:::is_little_endian))) {

    ## test vectors (originally for md5)
    spookyInput <- c("",
                     "a",
                     "abc",
                     "message digest",
                     "abcdefghijklmnopqrstuvwxyz",
                     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
                     paste("12345678901234567890123456789012345678901234567890123456789012",
                           "345678901234567890", sep=""))

    # from spooky import hash128
    # from binascii import hexlify
    #
    # spookyInput = [
    #     "",
    #       "a",
    #       "abc",
    #       "message digest",
    #       "abcdefghijklmnopqrstuvwxyz",
    #       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
    #       "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
    #     ]
    #
    # for s in spookyInput:
    #     hexlify(hash128(s).to_bytes(16, 'little')).decode()
    #
    # '1909f56bfc062723c751e8b465ee728b'
    # 'bdc9bba09181101a922a4161f0584275'
    # '67c93775f715ab8ab01178caf86713c6'
    # '9630c2a55c0987a0db44434f9d67a192'
    # '5172de938ce149a98f4d06d3c3168ffe'
    # 'b5b3b2d0f08b58aa07f551895f929f81'
    # '3621ec01112dafa1610a4bd23041966b'

    spookyOutputPython <-   c('1909f56bfc062723c751e8b465ee728b',
                              'bdc9bba09181101a922a4161f0584275',
                              '67c93775f715ab8ab01178caf86713c6',
                              '9630c2a55c0987a0db44434f9d67a192',
                              '5172de938ce149a98f4d06d3c3168ffe',
                              'b5b3b2d0f08b58aa07f551895f929f81',
                              '3621ec01112dafa1610a4bd23041966b')

    ## spooky raw output test
    for (i in seq(along.with=spookyInput)) {
      # skip = 30 skips the entire serialization header for a length 1 character vector
      # this is equivalent to raw = TRUE and matches the python spooky implementation for those vectors
      spooky <- digest(spookyInput[i], algo = "spookyhash", skip = 30)
      expect_true(identical(spooky, spookyOutputPython[i]))
      #cat(spooky, "\n")
    }

    expect_identical(
        getVDigest(algo = 'spookyhash')(spookyInput, skip = 30),
        spookyOutputPython
    )

    ## some extras to get coverage up - these aren't tested against reference output,
    ## just output from R 3.6.0
    spookyInput <- c("a", "aaaaaaaaa", "aaaaaaaaaaaaa")
    spookyOutput <- c("b7a3573ba6139dfdc52db30acba87f46",
                      "fd876ecaa5d1e442600333118f223e02",
                      "91848873bf91d06ad321bbd47400a556")
    for (i in seq(along.with=spookyInput)) {
        spooky <- digest(spookyInput[i], algo = "spookyhash")
        expect_true(identical(spooky, spookyOutput[i]))
        ##cat(spooky, "\n")
    }

    expect_identical(
        getVDigest(algo = 'spookyhash')(spookyInput),
        spookyOutput
    )

    ## test a bigger object
    spooky <- digest(iris, algo = "spookyhash")
    expect_true(identical(spooky, "af58add8b4f7044582b331083bc239ff"))
    expect_identical(getVDigest('spookyhash')(list(iris)),
                     "af58add8b4f7044582b331083bc239ff")
    ##cat(spooky, "\n")

    # test error message
    #error.message <- try(digest(spookyInput[i], algo = "spookyhash", serialize = FALSE))
    #expect_true(
    #  grepl("spookyhash algorithm is not available without serialization.", error.message)
    #)
}

## Ensure that all values of algo are actually allowed (in case a new one is
## added in the future). The call to match.arg() passes choices explicitly
## because it is significantly faster to do it than to have it automatically
## infer the possible choices from the function's formals.

# Grab the possible values of algo, then call digest() for each one.
algos <- eval(formals(digest)$algo)
for (algo in algos) {
  digest(123, algo = algo)
}
# Same for getVDigest
algos <- eval(formals(getVDigest)$algo)
for (algo in algos) {
  getVDigest(algo = algo)
}


## xxhash h3_64 variant
## reference values computed via xxhash and its xxh3_64 object and hexdiges printer:
## ie print(xxhash.xxh3_64("abc").hexdigest())
xxh3_64Input <- c("abc",
                  "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
                  "")
xxh3_64Output <- c("78af5f94892f3950",
                   "5bbcbbabcdcc3d3f",
                   "2d06800538d394c2")
for (i in seq(along.with=xxh3_64Input)) {
    xxh3_64 <- digest(xxh3_64Input[i], algo="xxh3_64", serialize=FALSE)
    #cat(xxh3_64, "\n")
    expect_true(identical(xxh3_64, xxh3_64Output[i]))
}

xxh3_64 <- getVDigest(algo = 'xxh3_64')
expect_identical(xxh3_64(xxh3_64Input, serialize = FALSE), xxh3_64Output)


## xxhash h3_128 variant
## reference values computed via xxhash and its xxh3_128 object and hexdiges printer:
## ie print(xxhash.xxh3_128("abc").hexdigest())
xxh3_128Input <- c("abc",
                   "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
                   "")
xxh3_128Output <- c("06b05ab6733a618578af5f94892f3950",
                    "3d62d22a5169b016c0d894fd4828a1a7",
                    "99aa06d3014798d86001c324468d497f")
for (i in seq(along.with=xxh3_128Input)) {
    xxh3_128 <- digest(xxh3_128Input[i], algo="xxh3_128", serialize=FALSE)
    #cat(xxh3_128, "\n")
    expect_true(identical(xxh3_128, xxh3_128Output[i]))
}

xxh3_128 <- getVDigest(algo = 'xxh3_128')
expect_identical(xxh3_128(xxh3_128Input, serialize = FALSE), xxh3_128Output)

## Verify that is.character(file) && missing(object) is tested
expect_true(is.character(digest(file = "test_digest.R")))

## Verify that a streaming algorithm with serialize is an error
expect_error(digest(object = "A", algo = "spookyhash", serialize = FALSE),
             pattern = "algorithm is not available without serialization")

## Verify that a non-character, non-raw object with a non-streaming algorithm is an error
expect_error(digest(object = 1, serialize = FALSE),
             pattern = "Argument object must be of type character or raw vector if serialize is FALSE")

Try the digest package in your browser

Any scripts or data that you put into this service are public.

digest documentation built on Nov. 19, 2025, 5:07 p.m.