exiftool <- function(fname, output="list", attr_filter=NULL, dedup=TRUE, verbose=FALSE) {
# Run 'exiftool' command line program on a file and return the results
# Arguments:
# fname (char or char vec): filepath or vector of filepaths to run exiftool on
# output {char} -- format to return data as
# options:
# list: return as list in format:
# list(
# fname1 = exif,
# fname2 = exif,
# ...)
# where 'exif' is a named character vector
# df: regular dataframe where columns are every possible EXIF attribute, one row per fname
# data.frame(fname=..., key1=..., key2=..., ...)
# df_melt: melted dataframe with only id variable, key and value columns
# data.frame(fname=..., key=..., val=...)
# json: json string version of df_melt
# [
# {
# "fname":"...",
# "key":"...",
# "val":"..."
# },
# ...
# ]
# list_json: list of json objects
# list(
# fname1=[{"key1":"val1","key2":"val2",...}],
# fname2=[{"key1":"val1","key2":"val2",...}],
# ...
# )
# attr_filter {char}: attribute or attributes to filter exiftool output for
# dedup {logical} -- If TRUE, names of EXIF attributes will be checked for duplicate names. If
# any are found, a suffix is appended. Suffixes may be "_2", "_3", ...
# verbose {logical} -- If TRUE, verbose output with timestamps printed to console
#
# Returns:
# exiftool data in desired output format: {list}, {dataframe}, {json}
# Program functions
splitAt <- function(x, pos) unname(split(x, cumsum(seq_along(x) %in% pos)))
split_fnames_char_limit <- function(fnames, char_limit) {
# Determine at which point to split vector of filenames to comply with arbitrary char limit
# Arguments:
# fnames (string or character vector): filepath or vector of filepaths
# char_limit {numeric} -- arbitrary character limit of command line command
#
# Returns:
# list
split_fname = c()
count = 0
str_lengths = nchar(fnames)
for (i in 1:length(str_lengths)) {
val = str_lengths[i] + 3 # Account for two double quotes and a space
count = count + val
if (count > char_limit - nchar("exiftool ")) {
split_fname = append(split_fname, i-1)
count = 0
}
}
return(splitAt(fname, split_fname))
}
get_exif_list <- function(fname_batches, attr_filter, dedup, verbose) {
# Iterate over batches and parse EXIF keys and values
# Arguments:
# fname_batches: fname_batches
# attr_filter: attr_filter
#
# Returns:
# {list}
suppressPackageStartupMessages(require(stringr))
if (verbose) echo("Running exiftool...", timestamp=TRUE, fn_name="exiftool")
# out = lapply(fname_batches, function(fname_vec, attr_filter) {
out = lapply(1:length(fname_batches), function(i, attr_filter, dedup) {
fname_vec = fname_batches[[i]]
fname_string = paste0(sprintf('"%s"', fname_vec), collapse = " ")
cmd = sprintf('exiftool %s', fname_string)
res = system(cmd, intern=TRUE)
split_loc = grep("^========", res)
res[split_loc] = NA
res = splitAt(res, split_loc)
res = lapply(res, function(x) x[!is.na(x)])
keys = lapply(res, function(x) tolower(gsub(" ", "_", trimws(gsub("^(.*?)(:)(.*)$", "\\1", x)))))
vals = lapply(res, function(x) trimws(gsub("^(.*?)(:)(.*)$", "\\3", x)))
# Filter result if any attributes specified
if (!is.null(attr_filter)) {
vals = lapply(1:length(vals), function(i) vals[[i]][keys[[i]] %in% attr_filter])
keys = lapply(keys, function(x) x[x %in% attr_filter])
}
# Apply names to each char vector
vals = lapply(1:length(vals), function(i) {
names(vals[[i]]) = keys[[i]]
return(vals[[i]])
})
# Mark any duplicate keys with a _\d suffix to ensure no duplicate keys
if (dedup == TRUE) {
vals = lapply(vals, function(x) {
suffix = 2
while (any(duplicated(names(x)))) {
names(x)[duplicated(names(x))] = paste0(names(x)[duplicated(names(x))], "_", suffix)
suffix = suffix + 1
}
# Account for cases with more than 2 duplicates. The above while loop will change
# a vector of c("focal_length", "focal_length", "focal_length") to
# c("focal_length", "focal_length_2", "focal_length_2_3")
# names(x) = gsub("\\_(\\d)\\_", "_", names(x))
# Commented out because it also changes a vector like
# c("face_1_position", "face_2_position") to c("face_position", "face_position")
return(x)
})
}
if (verbose) {
echo("Batch %s of %s complete (size: %s files)",
str_pad(i, width=nchar(max(length(fname_batches))), side="left", pad="0"),
length(fname_batches),
length(fname_vec),
indent=1, timestamp=TRUE, fn_name="exiftool")
}
return(vals)
}, attr_filter, dedup)
out = do.call(c, out)
names(out) = unlist(fname)
return(out)
}
format_output <- function(out, output) {
# Format results as desired output format
# Arguments:
# out {dataframe} -- dataframe to format
# output {char} -- desired output format, one of "list", "df", "list_json", "df_melt"
#
# Returns:
# out in output format
if (output == "list") {
return(out)
} else if (output == "df") {
seq_max = seq_len(max(sapply(out, length)))
mat = t(sapply(out, "[", i=seq_max))
mat = as.data.frame(cbind(rownames(mat), mat))
mat = data.frame(lapply(mat, as.character), stringsAsFactors=FALSE)
rownames(mat) = NULL
colnames(mat)[1] = "fname"
return(mat)
} else if (output == "list_json") {
require(jsonlite)
list_json = lapply(out, function(vec) {
df_json = data.frame(key=names(vec), val=unname(vec), stringsAsFactors=FALSE)
df_json = as.data.frame(t(df_json))
df_json = data.frame(lapply(df_json, as.character), stringsAsFactors=FALSE)
colnames(df_json) = as.character(df_json[1,])
rownames(df_json) = NULL
df_json = df_json[2,]
return(toJSON(df_json))
})
return(list_json)
} else if (output %in% c("df_melt", "json")) {
df = do.call(rbind, lapply(names(out), function(name) {
exif = out[name][[1]]
data.frame(
fname = name,
key = names(exif),
val = unname(exif),
stringsAsFactors=FALSE)
}))
if (output == "df_melt") {
return(df)
} else if (output == "json") {
require(jsonlite)
return(toJSON(df))
}
}
}
# Check parameters
stopifnot(is.character(fname) & length(fname) > 0)
# Run exiftool on each file, format as specified in function parameters
if (verbose) echo("Number of files detected : %s", length(fname), timestamp=TRUE, fn_name="exiftool")
est_time = fmt_seconds(0.06079523*length(fname), round_digits=0)
if (verbose) echo("Expected program time : %s %s", est_time$value, est_time$units, timestamp=TRUE, fn_name="exiftool")
char_limit = as.numeric(system("getconf ARG_MAX", intern=TRUE)) - 25000
if (verbose) echo("Using command-line character limit : %s", char_limit, timestamp=TRUE, fn_name="exiftool")
fname_batches = split_fnames_char_limit(fname, char_limit)
if (verbose) echo("Number of batches to run : %s", length(fname_batches), timestamp=TRUE, fn_name="exiftool")
out = get_exif_list(fname_batches, attr_filter, dedup, verbose)
out = format_output(out, output)
# If only run on a single file, and output is a list, return only named chr
if (class(out) == "list" && length(out) == 1) {
return(out[[1]])
} else {
return(out)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.