R/exif_read.R

Defines functions write_utf8 construct_argfile exif_version exif_call exif_read

Documented in exif_call exif_read exif_version

##' Read EXIF and other metadata from files
##'
##' Reads EXIF and other metadata into a \code{data.frame} by calling
##' Phil Harvey's ExifTool command-line application.
##'
##' From the \href{https://exiftool.org}{ExifTool website}:
##' "ExifTool is a platform-independent Perl library plus a
##' command-line application for reading, writing and editing meta
##' information in a wide variety of files. ExifTool supports many
##' different metadata formats including EXIF, GPS, IPTC, XMP, JFIF,
##' GeoTIFF, ICC Profile, Photoshop IRB, FlashPix, AFCP and ID3, as
##' well as the maker notes of many digital cameras by Canon, Casio,
##' DJI, FLIR, FujiFilm, GE, GoPro, HP, JVC/Victor, Kodak, Leaf,
##' Minolta/Konica-Minolta, Motorola, Nikon, Nintendo, Olympus/Epson,
##' Panasonic/Leica, Pentax/Asahi, Phase One, Reconyx, Ricoh, Samsung,
##' Sanyo, Sigma/Foveon and Sony."
##'
##' For more information, see the \href{https://exiftool.org}{ExifTool
##' website}.
##'
##' @param path A vector of filenames.
##' @param tags A vector of tags to output. It is a good idea to
##'     specify this when reading large numbers of files, as it
##'     decreases the output overhead significantly. Spaces will be
##'     stripped in the output data frame. This parameter is not
##'     case-sensitive.
##' @param recursive \code{TRUE} to pass the \code{"-r"} option to
##'     ExifTool.
##' @param args Additional arguments.
##' @param quiet Use \code{FALSE} to display diagnostic
##'     information. Default value is \code{TRUE}
##' @param pipeline One of \code{"json"} (the default) or
##'     \code{"csv"}. Controls whether the exiftool executable, behind
##'     the scenes, extracts metadata into a JSON data structure or a
##'     tabular csv. The JSON pipeline works well in most cases, but
##'     (as documented at
##'     \url{https://exiftool.org/exiftool_pod.html}) does not
##'     properly handle non-UTF-8 character sets. If the metadata
##'     fields include characters that are not encoded using UTF-8 and
##'     that need to be handled by setting the \code{"-charset"}
##'     option, use the \code{"csv"} pipeline as demonstrated in the
##'     second example below.
##' @return A data frame of class \code{"exiftoolr"} with one row per
##'     file processed. The first column, named \code{"SourceFile"}
##'     gives the name(s) of the processed files. Subsequent columns
##'     contain info from the tags read from those files.
##'
##'     Note that binary tags such as thumbnails are loaded as
##'     \href{https://en.wikipedia.org/wiki/Base64}{base64-encoded
##'     strings} that start with \code{"base64:"}. Although these are
##'     truncated in the printed representation of the
##'     \code{data.frame} returned by the function, they are left
##'     unaltered in the \code{data.frame} itself.
##' @references \url{https://exiftool.org}
##' @importFrom jsonlite fromJSON
##' @importFrom data.table fread
##' @export
##'
##' @examples
##' \dontrun{
##' files <- dir(system.file(package = "exiftoolr", "images"),
##'              pattern = "LaSals|Lizard", full.names = TRUE)
##' exif_read(files)
##' exif_read(files, tags = c("filename", "imagesize"))
##'
##' ## Use pipeline="csv" for images needing explicit specification
##' ## and proper handling of a non-default character sets
##' img_file <- system.file(package = "exiftoolr", "images", "QS_Hongg.jpg")
##' args <- c("-charset", "exiftool=cp1250")
##' res <- exif_read(img_file, args = args, pipeline = "csv")
##' res[["City"]]  ## "Zurich", with an umlaut over the "u"
##' }
exif_read <- function(path, tags = NULL,
                      recursive = FALSE,
                      args = NULL,
                      quiet = TRUE,
                      pipeline = c("json", "csv")) {
    pipeline <- match.arg(pipeline)
    ## Ensure that exiftoolr is properly configured
    if (!is_exiftoolr_configured()) {
        configure_exiftoolr(quiet = quiet)
        message("Using ExifTool version ", exif_version(), "\n")
    }

    ## ---- general input processing ----
    ## expand path
    path <- path.expand(path)

    ## check that all files exist (files that do not exist cause
    ## problems later, as do directories without recursive = TRUE)
    if (recursive) {
        missing_dirs <- path[!dir.exists(path)]
        if (length(missing_dirs)) {
            stop("Did you mean recursive = TRUE? ",
                 "The following directories are missing",
                 "(or are not directories): ",
                 paste(missing_files, collapse = ", "))
        }
    } else {
        missing_files <- path[!file.exists(path) | dir.exists(path)]
        if (length(missing_files)) {
            stop("Did you mean recursive = TRUE? ",
                 "The following files are missing (or are not files): ",
                 paste(missing_files, collapse = ", "))
        }
    }

    if (recursive) {
        args <- c(args, "-r")
    }

    ## an extra -q further silences warnings
    if (quiet) {
        args <- c(args, "-q")
    }

    if (!is.null(tags)) {
        ## tags cannot have spaces...whitespace is stripped by ExifTool
        tags <- gsub("\\s", "", tags)
        args <- c(paste0("-", tags), args)
    }

    ##-----------------------------------------##
    ## Process using JSON intermediate output  ##
    ##-----------------------------------------##

    ## required args:
    ##   -n for numeric output
    ##   -j for JSON output
    ##   -q for quiet
    ##   -b to ensure output is base64 encoded
    json_args <- c("-n", "-j", "-q", "-b", args)
    ## Construct and execute a call to Exiftool
    return_value <-
        suppressWarnings(exif_call(args = json_args, path = path))

    ## Handle rare case in which ExifTool finds no files to read
    ## (e.g. https://github.com/JoshOBrien/exiftoolr/issues/20)
    if (!length(return_value)) {
        args <- setdiff(json_args, "-q")
        return_value <-
            suppressWarnings(exif_call(args = args, path = path))
        warning(paste0(return_value, collapse = "\n"))
        return(NULL)
    }

    ## Postprocess the results
    return_value <- fromJSON(paste0(return_value, collapse = ""))


    if (pipeline == "csv") {
        ## Get name and class of each column read in via JSON
        json_names <- names(return_value)
        json_classes <- unname(sapply(return_value, class))
        ## required args:
        ##   -n for numeric output
        ##   -T for tabular output
        ##   -csv for CSV output
        ##   -api filter to properly handle tag values containing commas, double
        ##        quotes, newline characters, or leading or trailing spaces.
        ##   -q for quiet
        ##   -b to ensure output is base64 encoded
        filter <-
          if (.Platform$OS.type == "windows") {
             x <- '$_ = qq($_) if s/""/""""/g or /(^\\s+|\\s+$)/ or /[,\\n\\r]/'
             shQuote(x)
          } else {
             x <- '$_ = qq{"$_"} if s/"/""/g or /(^\\s+|\\s+$)/ or /[,\\n\\r]/'
             sQuote(x)
          }
        filter <- paste0("filter=", filter)
        csv_args <- c("-n", "-T", "-csv", "-q", "-b", "-api", filter, args)
        ## Call Exiftool, writing results to a temp file
        res_file <- tempfile("exif_results")
        on.exit(unlink(res_file))
        exif_call(args = csv_args, path = path, stdout = res_file)
        ## Prepare colClasses, accounting for possibility that the
        ## "csv" pipeline produces more columns than the "json" one
        csv_names <- names(fread(res_file, data.table = FALSE))
        colClasses <- json_classes[match(csv_names, json_names)]
        colClasses[is.na(colClasses)] <- "character"
        ## Read in results using best available column class info
        return_value <- fread(res_file, colClasses = colClasses,
                              data.table = FALSE)
    }

    class(return_value) <- c("exiftoolr", class(return_value))
    return_value
}

##' Call ExifTool from R
##'
##' Uses \code{system2()} to run a basic call to \code{exiftool}.
##' @param args Character vector of arguments, each written in same
##'     form as you would if writing them on the command line
##'     (e.g. \code{"-n"} or \code{"-csv"})
##' @param path A character vector giving one or more file paths.
##' @param stdout Where output to stdout should be sent. If
##'     \code{TRUE} (the default), the output is captured in a
##'     character vector. For other options, see the help file for
##'     \code{\link[base]{system2}}, the function to which this
##'     argument's value gets passed along.
##' @param quiet Use \code{FALSE} to display diagnostic
##'     information. Default value is \code{FALSE}.
##' @param ... Additional arguments to be passed to \code{system2()}.
##' @param config_file Path to a config file of the format expected by
##'     Exiftool's command line \code{-config} option. (See Details
##'     for an explanation of why this one option cannot be passed
##'     directly to \code{args} via the \code{-config} argument.)
##' @param common_args A character vector of arguments to be applied
##'     to all executed commands when the Exiftool \code{-execute}
##'     option is being used. (See Details for an explanation of why
##'     this option cannot be passed directly to \code{args} via
##'     \code{-common_args} argument.)
##' @details For examples of the command-line calls to ExifTool (all
##'     of which can be reproduced by calls to \code{exif_call}), see
##'     \url{https://exiftool.org/examples.html}.
##'
##'     Under the hood, \code{exif_call()} writes the options in
##'     \code{args} to a text file and then calls Exiftool, passing
##'     that text file's contents to Exiftool via its \code{-@
##'     ARGFILE} option. \code{-config} and \code{-common_args} are
##'     the two options that may not be used in such a \code{-@
##'     ARGFILE}, so we handle that option separately using
##'     \code{exif_call()}'s \code{config_file} argument.
##' @return The standard output as a character vector.
##' @export
##'
##' @examples
##' \dontrun{
##' ## Find local ExifTool version using exif_version() or exif_call()
##' exif_version()
##' exif_call(args = "-ver")
##'
##' ## Make temporary copies of a couple jpeg files
##' tmpdir <- tempdir()
##' src_files <- dir(system.file(package = "exiftoolr", "images"),
##'                  full.names = TRUE)
##' files <- file.path(tmpdir, basename(src_files))
##' file.copy(src_files, files)
##'
##' ## Both of the following extract the same tags:
##' exif_read(files, tags = c("filename", "imagesize"))
##' exif_call(args = c("-n", "-j", "-q", "-filename", "-imagesize"),
##'           path = files)
##'
##' ## Set value of a new "Artist" field in photo's metadata
##' file1 <- files[1]
##' exif_read(file1, tags = "artist")
##' exif_call(path = file1, args = "-Artist=me")
##' exif_read(file1, tags = "artist")
##'
##' ## Remove all but a few essential fields
##' length(exif_read(file1))
##' exif_call(path = file1, args = "-all=")
##' length(exif_read(file1))
##' exif_read(file1)
##'
##' ## Clean up
##' unlink(files)
##' }
exif_call <- function(args = NULL,
                      path = NULL,
                      stdout = TRUE,
                      quiet = FALSE,
                      ...,
                      config_file = NULL,
                      common_args = NULL) {
    ## Ensure that exiftoolr is properly configured
    if (!is_exiftoolr_configured()) {
        configure_exiftoolr(quiet = quiet)
        message("Using ExifTool version ", exif_version(), "\n")
    }

    ## Exiftool command
    exiftoolpath <- get_exiftool_command()
    if (is.null(exiftoolpath)) {
        stop("ExifTool not properly installed or configured")
    }

    ## Put all 'command-line' arguments in a file
    argfile <- construct_argfile(args = args, path = path)
    on.exit(unlink(argfile))

    ## Construct and then execute the command-line call
    args <- c("-@", shQuote(argfile))
    ## "-config" must come first and can't be used in an -@ ARGFILE
    if (!is.null(config_file)) {
        args <- c(paste("-config", shQuote(config_file)), args)
    }
    ## "-config" must come first and can't be used in an -@ ARGFILE
    if (!is.null(common_args)) {
        args <- c(args, "-common_args", common_args)
    }
    ## Handle case where exiftoolpath is something like
    ## c("/path/to/perl", "/path/to/exiftool")
    if (length(exiftoolpath) > 1) {
        args <- c(exiftoolpath[-1], args)
        exiftoolpath <- exiftoolpath[1]
    }
    system2(exiftoolpath, args = args, stdout = stdout)
}

##' @rdname exif_call
##' @export
exif_version <- function(quiet = TRUE) {
    exif_call(args = "-ver", quiet = quiet)
}

## private helper command to generate call to exiftool
construct_argfile <- function(args, path) {
    if (any(gl <- grepl("\n", args))) {
        args <- ifelse(gl,
                       paste0("#[CSTR]", gsub("\n", "\\\\n", args)),
                       args)
    }
    all_args <- c(args, path)
    tmpfile <- tempfile("args.cmd")
    write_utf8(all_args, tmpfile)
    tmpfile
}

## Better than `base::writeLines()` when writing UTF-8 `args`
## in a non-Unicode locale such as the "C" locale
## `write_utf8()` from https://github.com/gaborcsardi/rencfaq
## which is under CC0-1.0 Public Domain declaration
## Note by default `exiftool` converts to UTF-8: https://exiftool.org/faq.html#Q10
write_utf8 <- function(text, path) {
    ## Step 1: Ensure our text is utf8 encoded
    utf8 <- enc2utf8(text)
    upath <- enc2utf8(path)

    ## Step 2: Create a connection with 'native' encoding
    ## this signals to R that translation before writing
    ## to the connection should be skipped
    con <- file(upath, open = "w+", encoding = "native.enc")
    on.exit(close(con), add = TRUE)

    ## Step 3: Write to the connection with 'useBytes = TRUE',
    ## telling R to skip translation to the native encoding
    writeLines(utf8, con = con, useBytes = TRUE)
}
JoshOBrien/exiftoolr documentation built on April 1, 2024, 10:16 a.m.