R/pdf.R

Defines functions samePDF sameBlock trim

Documented in samePDF

## Compare two PDF files
##
## NOTE that this is a "shortcut" test to avoid having to "visually"
## compare by converting to PNG and using ImageMagick's 'compare'.
## If it fails, that is not meant to indicate that the files necessarily
## produce different visual output - the visual compare still occurs.
##
## NOTE also that "identical" ignores differences in CreationDate or
## ModDate or Producer or Creator
## 
## Returns logical value

digits <- charToRaw("0123456789")

Rheader <- c(as.list(charToRaw("%PDF-1.")),
             list(charToRaw("1234567")),
             list(charToRaw("\n")),
             list(charToRaw("%")),
             as.list(as.raw(c(0x81, 0xe2, 0x81, 0xe3, 0x81,
                              0xcf, 0x81, 0xd3, 0x5c, 0x72))),
             list(charToRaw("\n")),
             as.list(charToRaw("1 0 obj\n")),
             as.list(charToRaw("<<\n")),
             as.list(charToRaw("/CreationDate (D:")),
             rep(list(digits), 14),
             as.list(charToRaw(")\n")),
             as.list(charToRaw("/ModDate (D:")),
             rep(list(digits), 14),
             as.list(charToRaw(")\n")),
             as.list(charToRaw("/Title (R Graphics Output)\n")),
             as.list(charToRaw("/Producer (R ")),
             list(digits),
             list(charToRaw(".")),
             list(digits),
             list(charToRaw(".")),
             list(digits),
             as.list(charToRaw(")\n")),
             as.list(charToRaw("/Creator (R)\n")),
             as.list(charToRaw(">>\n")))

## Consume any bytes that match R PDF header
## Returns last byte read (which may be empty)
trim <- function(con) {
    byte <- readBin(con, "raw")
    match <- TRUE
    index <- 1
    endHeader <- FALSE
    while (length(byte) && match && !endHeader) {
        match <- byte %in% Rheader[[index]]
        byte <- readBin(con, "raw")
        index <- index + 1
        if (index > length(Rheader))
            endHeader <- TRUE
    }
    byte
}

sameBlock <- function(block1, block2) {
    n1 <- length(block1)
    n2 <- length(block2)
    (n1 == 0 && n2 == 0) || (n1 == n2 && all(block1 == block2))
}

## Compare two PDF files to see if they are the same
## Ignore details like creation date in PDF files that are generated by R
samePDF <- function(file1, file2) {
    con1 <- file(file1, "rb")
    con2 <- file(file2, "rb")
    on.exit({ close(con1); close(con2) })
    ## Consume any R PDF header
    ## (including unwanted differences like creation date)
    block1 <- trim(con1)
    block2 <- trim(con2)
    ## seek() checks that we have trimmed both files by the same amount
    equal <- seek(con1) == seek(con2) && sameBlock(block1, block2)
    while (equal && length(block1) && length(block2)) {
        block1 <- readBin(con1, "raw", 1000)
        block2 <- readBin(con2, "raw", 1000)
        equal <- sameBlock(block1, block2)
    }
    equal
}

Try the gdiff package in your browser

Any scripts or data that you put into this service are public.

gdiff documentation built on April 1, 2023, 12:11 a.m.