R/images.R

Defines functions plot.pdf_diff_df pdf_diff pdf_split pdf_np image_diff reduce_image

Documented in image_diff pdf_diff pdf_np pdf_split plot.pdf_diff_df reduce_image

##' Reduce Images
##'
##' Reduce the resolution of images.
##'
##' @param from filename of the original image
##' @param to filename of the reduced image
##' @return invisible NULL
##' @export
reduce_image <- function(from, to) {
    from <- normalizePath(from, mustWork = TRUE)
    fromdir <- dirname(from)
    frombase <- basename(from)
    fromweb <- file.path(fromdir, paste0("web_", frombase))
    if (file.exists(fromweb)) {
        file.copy(fromweb, to)
        return(invisible(NULL))
    } else {
        file.copy(from, to)
        return(invisible(NULL))
    }
}

##' Compare Images
##'
##' Submit an a reference image and a new image and get back the
##' percent difference in all colour channels.
##'
##' Requires imagemagick
##' @param reference Path to a reference image
##' @param new Path to an image to compare to the reference image
##' @param dir directory to perform the comparison inside
##' @return numeric The percent difference between the two images
##' @importFrom tools file_ext
image_diff <- function(reference, new, dir = tempdir()) {
    if (system2("convert", "-version", stdout = FALSE) != 0) {
        stop("In order to use this tool you need to install 'imagemagick'")
    }
    fileext1 <- file_ext(reference)
    fileext2 <- file_ext(new)
    stopifnot(fileext1 == fileext2)
    if (fileext1 == "pdf") {
        stopifnot(pdf_np(reference) == 1)
        stopifnot(pdf_np(new) == 1)
    }
    output <- tempfile(tmpdir = dir, fileext = paste0(".", fileext1))
    reference <- normalizePath(reference, mustWork = TRUE)
    new <- normalizePath(new, mustWork = TRUE)
    args <- c("-verbose", "-metric MAE")
    a <- system2("compare",
                 args = c(args, shQuote(new), shQuote(reference),
                          shQuote(output)),
                 stdout = TRUE,
                 stderr = TRUE)
    index <- grep("all: ", a)
    list(as.numeric(
        gsub("\\(",
             "",
             regmatches(a[index], regexpr("\\([^\\)]*", a[index])))),
        output)
}

##' Number of pages in a pdf
##'
##' Get the number of pages of a pdffile
##' @param path the path to the pdf file
##' @return numeric The number of pages
##' @importFrom pdftools pdf_info
pdf_np <- function(path) {
    pdf_info(normalizePath(path, mustWork = TRUE))$pages
}

##' Split up a pdf
##'
##' Split a pdf into it's component pages and return a vector of
##' filenames.
##' @param path The path to the pdf file to split.
##' @param dir directory for output. Default is to use a temporary
##'     directory.
##' @return A vector of the filenames of the resultant 1 page pdfs.
##' @importFrom qpdf pdf_split
pdf_split <- function(path, dir = tempdir()) {
    pdf_split(normalizePath(path, mustWork = TRUE),
              paste0(normalizePath(dir, mustWork = TRUE), "/"))
}

##' Difference of two pdf:s
##'
##' Submitt two pdfs of the same number of pages to this function and
##' get back a data.frame with the same number of rows as the pdf with
##' the percent pixel difference for each page and a path to a
##' composed image of each page highlighting the differences in red.
##'
##' @title pdf_diff
##' @param reference Path to a pdf file
##' @param new Path to a pdf file
##' @param dir directory to perform the comparison inside
##' @return data.frame A data.frame with 3 columns: page,
##'     percent_diff, composite.
##' @export
pdf_diff <- function(reference, new, dir = tempdir()) {
    reference <- normalizePath(reference, mustWork = TRUE)
    new <- normalizePath(new, mustWork = TRUE)
    stopifnot(pdf_np(reference) == pdf_np(new))
    np <- pdf_np(reference)
    message("Splitting the \"reference\" pdf into individual pages")
    pages_ref <- pdf_split(reference, dir)
    message("Splitting the \"new\" PDF into individual pages")
    pages_new <- pdf_split(new, dir)
    message("Comparing individual pages")
    df <- do.call("rbind", lapply(seq_len(np), function(i) {
        id <- image_diff(pages_ref[i], pages_new[i], dir)
        data.frame(page = i,
                   percent_diff = id[[1]],
                   composite = id[[2]],
                   original = pages_ref[i],
                   modified = pages_new[i],
                   stringsAsFactors = FALSE)
    }))
    class(df) <- c("pdf_diff_df", class(df))
    df
}

##' @title plot.pdf_diff_df
##'
##' A plotting method for the dataframe generated by 'pdf_diff()'
##'
##' @param x the data.frame generated by pdf_diff()
##' @param ... other arguments to the genertic plot function
##' @importFrom graphics plot
##' @importFrom graphics text
##' @export
##' @return A plot
plot.pdf_diff_df <- function(x, ...) {
    plot(x$page,
         x$percent_diff,
         pch = 20,
         col = "grey40",
         type = "p",
         ylab = "Percent difference of page pixels",
         xlab = "Page number",
         ...)
    text(x$page,
         x$percent_diff,
         x$page, pos = 4,
         ...)
}
SVA-SE/mill documentation built on June 21, 2020, 4:09 a.m.